1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/Analysis/TargetLibraryInfo.h"
35 #include "llvm/Analysis/VectorUtils.h"
36 #include "llvm/CodeGen/DAGCombine.h"
37 #include "llvm/CodeGen/ISDOpcodes.h"
38 #include "llvm/CodeGen/MachineFrameInfo.h"
39 #include "llvm/CodeGen/MachineFunction.h"
40 #include "llvm/CodeGen/MachineMemOperand.h"
41 #include "llvm/CodeGen/RuntimeLibcalls.h"
42 #include "llvm/CodeGen/SelectionDAG.h"
43 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44 #include "llvm/CodeGen/SelectionDAGNodes.h"
45 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46 #include "llvm/CodeGen/TargetLowering.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/TargetSubtargetInfo.h"
49 #include "llvm/CodeGen/ValueTypes.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CodeGen.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/KnownBits.h"
64 #include "llvm/Support/MachineValueType.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/raw_ostream.h"
67 #include "llvm/Target/TargetMachine.h"
68 #include "llvm/Target/TargetOptions.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <functional>
73 #include <iterator>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "dagcombine"
81 
82 STATISTIC(NodesCombined   , "Number of dag nodes combined");
83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
86 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
87 STATISTIC(SlicedLoads, "Number of load sliced");
88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89 
90 static cl::opt<bool>
91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 
94 static cl::opt<bool>
95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96         cl::desc("Enable DAG combiner's use of TBAA"));
97 
98 #ifndef NDEBUG
99 static cl::opt<std::string>
100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101                    cl::desc("Only use DAG-combiner alias analysis in this"
102                             " function"));
103 #endif
104 
105 /// Hidden option to stress test load slicing, i.e., when this option
106 /// is enabled, load slicing bypasses most of its profitability guards.
107 static cl::opt<bool>
108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109                   cl::desc("Bypass the profitability model of load slicing"),
110                   cl::init(false));
111 
112 static cl::opt<bool>
113   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114                     cl::desc("DAG combiner may split indexing from loads"));
115 
116 static cl::opt<bool>
117     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118                        cl::desc("DAG combiner enable merging multiple stores "
119                                 "into a wider store"));
120 
121 static cl::opt<unsigned> TokenFactorInlineLimit(
122     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123     cl::desc("Limit the number of operands to inline for Token Factors"));
124 
125 static cl::opt<unsigned> StoreMergeDependenceLimit(
126     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127     cl::desc("Limit the number of times for the same StoreNode and RootNode "
128              "to bail out in store merging dependence check"));
129 
130 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132     cl::desc("DAG cominber enable reducing the width of load/op/store "
133              "sequence"));
134 
135 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137     cl::desc("DAG cominber enable load/<replace bytes>/store with "
138              "a narrower store"));
139 
140 namespace {
141 
142   class DAGCombiner {
143     SelectionDAG &DAG;
144     const TargetLowering &TLI;
145     const SelectionDAGTargetInfo *STI;
146     CombineLevel Level;
147     CodeGenOpt::Level OptLevel;
148     bool LegalDAG = false;
149     bool LegalOperations = false;
150     bool LegalTypes = false;
151     bool ForCodeSize;
152     bool DisableGenericCombines;
153 
154     /// Worklist of all of the nodes that need to be simplified.
155     ///
156     /// This must behave as a stack -- new nodes to process are pushed onto the
157     /// back and when processing we pop off of the back.
158     ///
159     /// The worklist will not contain duplicates but may contain null entries
160     /// due to nodes being deleted from the underlying DAG.
161     SmallVector<SDNode *, 64> Worklist;
162 
163     /// Mapping from an SDNode to its position on the worklist.
164     ///
165     /// This is used to find and remove nodes from the worklist (by nulling
166     /// them) when they are deleted from the underlying DAG. It relies on
167     /// stable indices of nodes within the worklist.
168     DenseMap<SDNode *, unsigned> WorklistMap;
169     /// This records all nodes attempted to add to the worklist since we
170     /// considered a new worklist entry. As we keep do not add duplicate nodes
171     /// in the worklist, this is different from the tail of the worklist.
172     SmallSetVector<SDNode *, 32> PruningList;
173 
174     /// Set of nodes which have been combined (at least once).
175     ///
176     /// This is used to allow us to reliably add any operands of a DAG node
177     /// which have not yet been combined to the worklist.
178     SmallPtrSet<SDNode *, 32> CombinedNodes;
179 
180     /// Map from candidate StoreNode to the pair of RootNode and count.
181     /// The count is used to track how many times we have seen the StoreNode
182     /// with the same RootNode bail out in dependence check. If we have seen
183     /// the bail out for the same pair many times over a limit, we won't
184     /// consider the StoreNode with the same RootNode as store merging
185     /// candidate again.
186     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
187 
188     // AA - Used for DAG load/store alias analysis.
189     AliasAnalysis *AA;
190 
191     /// When an instruction is simplified, add all users of the instruction to
192     /// the work lists because they might get more simplified now.
193     void AddUsersToWorklist(SDNode *N) {
194       for (SDNode *Node : N->uses())
195         AddToWorklist(Node);
196     }
197 
198     /// Convenient shorthand to add a node and all of its user to the worklist.
199     void AddToWorklistWithUsers(SDNode *N) {
200       AddUsersToWorklist(N);
201       AddToWorklist(N);
202     }
203 
204     // Prune potentially dangling nodes. This is called after
205     // any visit to a node, but should also be called during a visit after any
206     // failed combine which may have created a DAG node.
207     void clearAddedDanglingWorklistEntries() {
208       // Check any nodes added to the worklist to see if they are prunable.
209       while (!PruningList.empty()) {
210         auto *N = PruningList.pop_back_val();
211         if (N->use_empty())
212           recursivelyDeleteUnusedNodes(N);
213       }
214     }
215 
216     SDNode *getNextWorklistEntry() {
217       // Before we do any work, remove nodes that are not in use.
218       clearAddedDanglingWorklistEntries();
219       SDNode *N = nullptr;
220       // The Worklist holds the SDNodes in order, but it may contain null
221       // entries.
222       while (!N && !Worklist.empty()) {
223         N = Worklist.pop_back_val();
224       }
225 
226       if (N) {
227         bool GoodWorklistEntry = WorklistMap.erase(N);
228         (void)GoodWorklistEntry;
229         assert(GoodWorklistEntry &&
230                "Found a worklist entry without a corresponding map entry!");
231       }
232       return N;
233     }
234 
235     /// Call the node-specific routine that folds each particular type of node.
236     SDValue visit(SDNode *N);
237 
238   public:
239     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240         : DAG(D), TLI(D.getTargetLoweringInfo()),
241           STI(D.getSubtarget().getSelectionDAGInfo()),
242           Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243       ForCodeSize = DAG.shouldOptForSize();
244       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245 
246       MaximumLegalStoreInBits = 0;
247       // We use the minimum store size here, since that's all we can guarantee
248       // for the scalable vector types.
249       for (MVT VT : MVT::all_valuetypes())
250         if (EVT(VT).isSimple() && VT != MVT::Other &&
251             TLI.isTypeLegal(EVT(VT)) &&
252             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254     }
255 
256     void ConsiderForPruning(SDNode *N) {
257       // Mark this for potential pruning.
258       PruningList.insert(N);
259     }
260 
261     /// Add to the worklist making sure its instance is at the back (next to be
262     /// processed.)
263     void AddToWorklist(SDNode *N) {
264       assert(N->getOpcode() != ISD::DELETED_NODE &&
265              "Deleted Node added to Worklist");
266 
267       // Skip handle nodes as they can't usefully be combined and confuse the
268       // zero-use deletion strategy.
269       if (N->getOpcode() == ISD::HANDLENODE)
270         return;
271 
272       ConsiderForPruning(N);
273 
274       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275         Worklist.push_back(N);
276     }
277 
278     /// Remove all instances of N from the worklist.
279     void removeFromWorklist(SDNode *N) {
280       CombinedNodes.erase(N);
281       PruningList.remove(N);
282       StoreRootCountMap.erase(N);
283 
284       auto It = WorklistMap.find(N);
285       if (It == WorklistMap.end())
286         return; // Not in the worklist.
287 
288       // Null out the entry rather than erasing it to avoid a linear operation.
289       Worklist[It->second] = nullptr;
290       WorklistMap.erase(It);
291     }
292 
293     void deleteAndRecombine(SDNode *N);
294     bool recursivelyDeleteUnusedNodes(SDNode *N);
295 
296     /// Replaces all uses of the results of one DAG node with new values.
297     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298                       bool AddTo = true);
299 
300     /// Replaces all uses of the results of one DAG node with new values.
301     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302       return CombineTo(N, &Res, 1, AddTo);
303     }
304 
305     /// Replaces all uses of the results of one DAG node with new values.
306     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307                       bool AddTo = true) {
308       SDValue To[] = { Res0, Res1 };
309       return CombineTo(N, To, 2, AddTo);
310     }
311 
312     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313 
314   private:
315     unsigned MaximumLegalStoreInBits;
316 
317     /// Check the specified integer node value to see if it can be simplified or
318     /// if things it uses can be simplified by bit propagation.
319     /// If so, return true.
320     bool SimplifyDemandedBits(SDValue Op) {
321       unsigned BitWidth = Op.getScalarValueSizeInBits();
322       APInt DemandedBits = APInt::getAllOnes(BitWidth);
323       return SimplifyDemandedBits(Op, DemandedBits);
324     }
325 
326     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328       KnownBits Known;
329       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330         return false;
331 
332       // Revisit the node.
333       AddToWorklist(Op.getNode());
334 
335       CommitTargetLoweringOpt(TLO);
336       return true;
337     }
338 
339     /// Check the specified vector node value to see if it can be simplified or
340     /// if things it uses can be simplified as it only uses some of the
341     /// elements. If so, return true.
342     bool SimplifyDemandedVectorElts(SDValue Op) {
343       // TODO: For now just pretend it cannot be simplified.
344       if (Op.getValueType().isScalableVector())
345         return false;
346 
347       unsigned NumElts = Op.getValueType().getVectorNumElements();
348       APInt DemandedElts = APInt::getAllOnes(NumElts);
349       return SimplifyDemandedVectorElts(Op, DemandedElts);
350     }
351 
352     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353                               const APInt &DemandedElts,
354                               bool AssumeSingleUse = false);
355     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356                                     bool AssumeSingleUse = false);
357 
358     bool CombineToPreIndexedLoadStore(SDNode *N);
359     bool CombineToPostIndexedLoadStore(SDNode *N);
360     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361     bool SliceUpLoad(SDNode *N);
362 
363     // Scalars have size 0 to distinguish from singleton vectors.
364     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367 
368     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369     ///   load.
370     ///
371     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373     /// \param EltNo index of the vector element to load.
374     /// \param OriginalLoad load that EVE came from to be replaced.
375     /// \returns EVE on success SDValue() on failure.
376     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377                                          SDValue EltNo,
378                                          LoadSDNode *OriginalLoad);
379     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383     SDValue PromoteIntBinOp(SDValue Op);
384     SDValue PromoteIntShiftOp(SDValue Op);
385     SDValue PromoteExtend(SDValue Op);
386     bool PromoteLoad(SDValue Op);
387 
388     /// Call the node-specific routine that knows how to fold each
389     /// particular type of node. If that doesn't do anything, try the
390     /// target-specific DAG combines.
391     SDValue combine(SDNode *N);
392 
393     // Visitation implementation - Implement dag node combining for different
394     // node types.  The semantics are as follows:
395     // Return Value:
396     //   SDValue.getNode() == 0 - No change was made
397     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
398     //   otherwise              - N should be replaced by the returned Operand.
399     //
400     SDValue visitTokenFactor(SDNode *N);
401     SDValue visitMERGE_VALUES(SDNode *N);
402     SDValue visitADD(SDNode *N);
403     SDValue visitADDLike(SDNode *N);
404     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405     SDValue visitSUB(SDNode *N);
406     SDValue visitADDSAT(SDNode *N);
407     SDValue visitSUBSAT(SDNode *N);
408     SDValue visitADDC(SDNode *N);
409     SDValue visitADDO(SDNode *N);
410     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411     SDValue visitSUBC(SDNode *N);
412     SDValue visitSUBO(SDNode *N);
413     SDValue visitADDE(SDNode *N);
414     SDValue visitADDCARRY(SDNode *N);
415     SDValue visitSADDO_CARRY(SDNode *N);
416     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417     SDValue visitSUBE(SDNode *N);
418     SDValue visitSUBCARRY(SDNode *N);
419     SDValue visitSSUBO_CARRY(SDNode *N);
420     SDValue visitMUL(SDNode *N);
421     SDValue visitMULFIX(SDNode *N);
422     SDValue useDivRem(SDNode *N);
423     SDValue visitSDIV(SDNode *N);
424     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425     SDValue visitUDIV(SDNode *N);
426     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427     SDValue visitREM(SDNode *N);
428     SDValue visitMULHU(SDNode *N);
429     SDValue visitMULHS(SDNode *N);
430     SDValue visitSMUL_LOHI(SDNode *N);
431     SDValue visitUMUL_LOHI(SDNode *N);
432     SDValue visitMULO(SDNode *N);
433     SDValue visitIMINMAX(SDNode *N);
434     SDValue visitAND(SDNode *N);
435     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436     SDValue visitOR(SDNode *N);
437     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438     SDValue visitXOR(SDNode *N);
439     SDValue SimplifyVBinOp(SDNode *N);
440     SDValue visitSHL(SDNode *N);
441     SDValue visitSRA(SDNode *N);
442     SDValue visitSRL(SDNode *N);
443     SDValue visitFunnelShift(SDNode *N);
444     SDValue visitRotate(SDNode *N);
445     SDValue visitABS(SDNode *N);
446     SDValue visitBSWAP(SDNode *N);
447     SDValue visitBITREVERSE(SDNode *N);
448     SDValue visitCTLZ(SDNode *N);
449     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450     SDValue visitCTTZ(SDNode *N);
451     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452     SDValue visitCTPOP(SDNode *N);
453     SDValue visitSELECT(SDNode *N);
454     SDValue visitVSELECT(SDNode *N);
455     SDValue visitSELECT_CC(SDNode *N);
456     SDValue visitSETCC(SDNode *N);
457     SDValue visitSETCCCARRY(SDNode *N);
458     SDValue visitSIGN_EXTEND(SDNode *N);
459     SDValue visitZERO_EXTEND(SDNode *N);
460     SDValue visitANY_EXTEND(SDNode *N);
461     SDValue visitAssertExt(SDNode *N);
462     SDValue visitAssertAlign(SDNode *N);
463     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
465     SDValue visitTRUNCATE(SDNode *N);
466     SDValue visitBITCAST(SDNode *N);
467     SDValue visitFREEZE(SDNode *N);
468     SDValue visitBUILD_PAIR(SDNode *N);
469     SDValue visitFADD(SDNode *N);
470     SDValue visitSTRICT_FADD(SDNode *N);
471     SDValue visitFSUB(SDNode *N);
472     SDValue visitFMUL(SDNode *N);
473     SDValue visitFMA(SDNode *N);
474     SDValue visitFDIV(SDNode *N);
475     SDValue visitFREM(SDNode *N);
476     SDValue visitFSQRT(SDNode *N);
477     SDValue visitFCOPYSIGN(SDNode *N);
478     SDValue visitFPOW(SDNode *N);
479     SDValue visitSINT_TO_FP(SDNode *N);
480     SDValue visitUINT_TO_FP(SDNode *N);
481     SDValue visitFP_TO_SINT(SDNode *N);
482     SDValue visitFP_TO_UINT(SDNode *N);
483     SDValue visitFP_ROUND(SDNode *N);
484     SDValue visitFP_EXTEND(SDNode *N);
485     SDValue visitFNEG(SDNode *N);
486     SDValue visitFABS(SDNode *N);
487     SDValue visitFCEIL(SDNode *N);
488     SDValue visitFTRUNC(SDNode *N);
489     SDValue visitFFLOOR(SDNode *N);
490     SDValue visitFMINNUM(SDNode *N);
491     SDValue visitFMAXNUM(SDNode *N);
492     SDValue visitFMINIMUM(SDNode *N);
493     SDValue visitFMAXIMUM(SDNode *N);
494     SDValue visitBRCOND(SDNode *N);
495     SDValue visitBR_CC(SDNode *N);
496     SDValue visitLOAD(SDNode *N);
497 
498     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
499     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
500 
501     SDValue visitSTORE(SDNode *N);
502     SDValue visitLIFETIME_END(SDNode *N);
503     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
504     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
505     SDValue visitBUILD_VECTOR(SDNode *N);
506     SDValue visitCONCAT_VECTORS(SDNode *N);
507     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
508     SDValue visitVECTOR_SHUFFLE(SDNode *N);
509     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
510     SDValue visitINSERT_SUBVECTOR(SDNode *N);
511     SDValue visitMLOAD(SDNode *N);
512     SDValue visitMSTORE(SDNode *N);
513     SDValue visitMGATHER(SDNode *N);
514     SDValue visitMSCATTER(SDNode *N);
515     SDValue visitFP_TO_FP16(SDNode *N);
516     SDValue visitFP16_TO_FP(SDNode *N);
517     SDValue visitVECREDUCE(SDNode *N);
518     SDValue visitVPOp(SDNode *N);
519 
520     SDValue visitFADDForFMACombine(SDNode *N);
521     SDValue visitFSUBForFMACombine(SDNode *N);
522     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
523 
524     SDValue XformToShuffleWithZero(SDNode *N);
525     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
526                                                     const SDLoc &DL, SDValue N0,
527                                                     SDValue N1);
528     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
529                                       SDValue N1);
530     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
531                            SDValue N1, SDNodeFlags Flags);
532 
533     SDValue visitShiftByConstant(SDNode *N);
534 
535     SDValue foldSelectOfConstants(SDNode *N);
536     SDValue foldVSelectOfConstants(SDNode *N);
537     SDValue foldBinOpIntoSelect(SDNode *BO);
538     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
539     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
540     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
541     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
542                              SDValue N2, SDValue N3, ISD::CondCode CC,
543                              bool NotExtCompare = false);
544     SDValue convertSelectOfFPConstantsToLoadOffset(
545         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
546         ISD::CondCode CC);
547     SDValue foldSignChangeInBitcast(SDNode *N);
548     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
549                                    SDValue N2, SDValue N3, ISD::CondCode CC);
550     SDValue foldSelectOfBinops(SDNode *N);
551     SDValue foldSextSetcc(SDNode *N);
552     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
553                               const SDLoc &DL);
554     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
555     SDValue unfoldMaskedMerge(SDNode *N);
556     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
557     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
558                           const SDLoc &DL, bool foldBooleans);
559     SDValue rebuildSetCC(SDValue N);
560 
561     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
562                            SDValue &CC, bool MatchStrict = false) const;
563     bool isOneUseSetCC(SDValue N) const;
564 
565     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
566                                          unsigned HiOp);
567     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
568     SDValue CombineExtLoad(SDNode *N);
569     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
570     SDValue combineRepeatedFPDivisors(SDNode *N);
571     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
572     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
573     SDValue BuildSDIV(SDNode *N);
574     SDValue BuildSDIVPow2(SDNode *N);
575     SDValue BuildUDIV(SDNode *N);
576     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
577     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
578     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
579     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
580     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
581     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
582                                 SDNodeFlags Flags, bool Reciprocal);
583     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
584                                 SDNodeFlags Flags, bool Reciprocal);
585     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
586                                bool DemandHighBits = true);
587     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
588     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
589                               SDValue InnerPos, SDValue InnerNeg,
590                               unsigned PosOpcode, unsigned NegOpcode,
591                               const SDLoc &DL);
592     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
593                               SDValue InnerPos, SDValue InnerNeg,
594                               unsigned PosOpcode, unsigned NegOpcode,
595                               const SDLoc &DL);
596     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
597     SDValue MatchLoadCombine(SDNode *N);
598     SDValue mergeTruncStores(StoreSDNode *N);
599     SDValue ReduceLoadWidth(SDNode *N);
600     SDValue ReduceLoadOpStoreWidth(SDNode *N);
601     SDValue splitMergedValStore(StoreSDNode *ST);
602     SDValue TransformFPLoadStorePair(SDNode *N);
603     SDValue convertBuildVecZextToZext(SDNode *N);
604     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
605     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
606     SDValue reduceBuildVecToShuffle(SDNode *N);
607     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
608                                   ArrayRef<int> VectorMask, SDValue VecIn1,
609                                   SDValue VecIn2, unsigned LeftIdx,
610                                   bool DidSplitVec);
611     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
612 
613     /// Walk up chain skipping non-aliasing memory nodes,
614     /// looking for aliasing nodes and adding them to the Aliases vector.
615     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
616                           SmallVectorImpl<SDValue> &Aliases);
617 
618     /// Return true if there is any possibility that the two addresses overlap.
619     bool mayAlias(SDNode *Op0, SDNode *Op1) const;
620 
621     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
622     /// chain (aliasing node.)
623     SDValue FindBetterChain(SDNode *N, SDValue Chain);
624 
625     /// Try to replace a store and any possibly adjacent stores on
626     /// consecutive chains with better chains. Return true only if St is
627     /// replaced.
628     ///
629     /// Notice that other chains may still be replaced even if the function
630     /// returns false.
631     bool findBetterNeighborChains(StoreSDNode *St);
632 
633     // Helper for findBetterNeighborChains. Walk up store chain add additional
634     // chained stores that do not overlap and can be parallelized.
635     bool parallelizeChainedStores(StoreSDNode *St);
636 
637     /// Holds a pointer to an LSBaseSDNode as well as information on where it
638     /// is located in a sequence of memory operations connected by a chain.
639     struct MemOpLink {
640       // Ptr to the mem node.
641       LSBaseSDNode *MemNode;
642 
643       // Offset from the base ptr.
644       int64_t OffsetFromBase;
645 
646       MemOpLink(LSBaseSDNode *N, int64_t Offset)
647           : MemNode(N), OffsetFromBase(Offset) {}
648     };
649 
650     // Classify the origin of a stored value.
651     enum class StoreSource { Unknown, Constant, Extract, Load };
652     StoreSource getStoreSource(SDValue StoreVal) {
653       switch (StoreVal.getOpcode()) {
654       case ISD::Constant:
655       case ISD::ConstantFP:
656         return StoreSource::Constant;
657       case ISD::EXTRACT_VECTOR_ELT:
658       case ISD::EXTRACT_SUBVECTOR:
659         return StoreSource::Extract;
660       case ISD::LOAD:
661         return StoreSource::Load;
662       default:
663         return StoreSource::Unknown;
664       }
665     }
666 
667     /// This is a helper function for visitMUL to check the profitability
668     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
669     /// MulNode is the original multiply, AddNode is (add x, c1),
670     /// and ConstNode is c2.
671     bool isMulAddWithConstProfitable(SDNode *MulNode,
672                                      SDValue &AddNode,
673                                      SDValue &ConstNode);
674 
675     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
676     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
677     /// the type of the loaded value to be extended.
678     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
679                           EVT LoadResultTy, EVT &ExtVT);
680 
681     /// Helper function to calculate whether the given Load/Store can have its
682     /// width reduced to ExtVT.
683     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
684                            EVT &MemVT, unsigned ShAmt = 0);
685 
686     /// Used by BackwardsPropagateMask to find suitable loads.
687     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
688                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
689                            ConstantSDNode *Mask, SDNode *&NodeToMask);
690     /// Attempt to propagate a given AND node back to load leaves so that they
691     /// can be combined into narrow loads.
692     bool BackwardsPropagateMask(SDNode *N);
693 
694     /// Helper function for mergeConsecutiveStores which merges the component
695     /// store chains.
696     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
697                                 unsigned NumStores);
698 
699     /// This is a helper function for mergeConsecutiveStores. When the source
700     /// elements of the consecutive stores are all constants or all extracted
701     /// vector elements, try to merge them into one larger store introducing
702     /// bitcasts if necessary.  \return True if a merged store was created.
703     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
704                                          EVT MemVT, unsigned NumStores,
705                                          bool IsConstantSrc, bool UseVector,
706                                          bool UseTrunc);
707 
708     /// This is a helper function for mergeConsecutiveStores. Stores that
709     /// potentially may be merged with St are placed in StoreNodes. RootNode is
710     /// a chain predecessor to all store candidates.
711     void getStoreMergeCandidates(StoreSDNode *St,
712                                  SmallVectorImpl<MemOpLink> &StoreNodes,
713                                  SDNode *&Root);
714 
715     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
716     /// have indirect dependency through their operands. RootNode is the
717     /// predecessor to all stores calculated by getStoreMergeCandidates and is
718     /// used to prune the dependency check. \return True if safe to merge.
719     bool checkMergeStoreCandidatesForDependencies(
720         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
721         SDNode *RootNode);
722 
723     /// This is a helper function for mergeConsecutiveStores. Given a list of
724     /// store candidates, find the first N that are consecutive in memory.
725     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
726     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
727                                   int64_t ElementSizeBytes) const;
728 
729     /// This is a helper function for mergeConsecutiveStores. It is used for
730     /// store chains that are composed entirely of constant values.
731     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
732                                   unsigned NumConsecutiveStores,
733                                   EVT MemVT, SDNode *Root, bool AllowVectors);
734 
735     /// This is a helper function for mergeConsecutiveStores. It is used for
736     /// store chains that are composed entirely of extracted vector elements.
737     /// When extracting multiple vector elements, try to store them in one
738     /// vector store rather than a sequence of scalar stores.
739     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
740                                  unsigned NumConsecutiveStores, EVT MemVT,
741                                  SDNode *Root);
742 
743     /// This is a helper function for mergeConsecutiveStores. It is used for
744     /// store chains that are composed entirely of loaded values.
745     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
746                               unsigned NumConsecutiveStores, EVT MemVT,
747                               SDNode *Root, bool AllowVectors,
748                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
749 
750     /// Merge consecutive store operations into a wide store.
751     /// This optimization uses wide integers or vectors when possible.
752     /// \return true if stores were merged.
753     bool mergeConsecutiveStores(StoreSDNode *St);
754 
755     /// Try to transform a truncation where C is a constant:
756     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
757     ///
758     /// \p N needs to be a truncation and its first operand an AND. Other
759     /// requirements are checked by the function (e.g. that trunc is
760     /// single-use) and if missed an empty SDValue is returned.
761     SDValue distributeTruncateThroughAnd(SDNode *N);
762 
763     /// Helper function to determine whether the target supports operation
764     /// given by \p Opcode for type \p VT, that is, whether the operation
765     /// is legal or custom before legalizing operations, and whether is
766     /// legal (but not custom) after legalization.
767     bool hasOperation(unsigned Opcode, EVT VT) {
768       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
769     }
770 
771   public:
772     /// Runs the dag combiner on all nodes in the work list
773     void Run(CombineLevel AtLevel);
774 
775     SelectionDAG &getDAG() const { return DAG; }
776 
777     /// Returns a type large enough to hold any valid shift amount - before type
778     /// legalization these can be huge.
779     EVT getShiftAmountTy(EVT LHSTy) {
780       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
781       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
782     }
783 
784     /// This method returns true if we are running before type legalization or
785     /// if the specified VT is legal.
786     bool isTypeLegal(const EVT &VT) {
787       if (!LegalTypes) return true;
788       return TLI.isTypeLegal(VT);
789     }
790 
791     /// Convenience wrapper around TargetLowering::getSetCCResultType
792     EVT getSetCCResultType(EVT VT) const {
793       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
794     }
795 
796     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
797                          SDValue OrigLoad, SDValue ExtLoad,
798                          ISD::NodeType ExtType);
799   };
800 
801 /// This class is a DAGUpdateListener that removes any deleted
802 /// nodes from the worklist.
803 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
804   DAGCombiner &DC;
805 
806 public:
807   explicit WorklistRemover(DAGCombiner &dc)
808     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
809 
810   void NodeDeleted(SDNode *N, SDNode *E) override {
811     DC.removeFromWorklist(N);
812   }
813 };
814 
815 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
816   DAGCombiner &DC;
817 
818 public:
819   explicit WorklistInserter(DAGCombiner &dc)
820       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
821 
822   // FIXME: Ideally we could add N to the worklist, but this causes exponential
823   //        compile time costs in large DAGs, e.g. Halide.
824   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
825 };
826 
827 } // end anonymous namespace
828 
829 //===----------------------------------------------------------------------===//
830 //  TargetLowering::DAGCombinerInfo implementation
831 //===----------------------------------------------------------------------===//
832 
833 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
834   ((DAGCombiner*)DC)->AddToWorklist(N);
835 }
836 
837 SDValue TargetLowering::DAGCombinerInfo::
838 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
839   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
840 }
841 
842 SDValue TargetLowering::DAGCombinerInfo::
843 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
844   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
845 }
846 
847 SDValue TargetLowering::DAGCombinerInfo::
848 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
849   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
850 }
851 
852 bool TargetLowering::DAGCombinerInfo::
853 recursivelyDeleteUnusedNodes(SDNode *N) {
854   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
855 }
856 
857 void TargetLowering::DAGCombinerInfo::
858 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
859   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
860 }
861 
862 //===----------------------------------------------------------------------===//
863 // Helper Functions
864 //===----------------------------------------------------------------------===//
865 
866 void DAGCombiner::deleteAndRecombine(SDNode *N) {
867   removeFromWorklist(N);
868 
869   // If the operands of this node are only used by the node, they will now be
870   // dead. Make sure to re-visit them and recursively delete dead nodes.
871   for (const SDValue &Op : N->ops())
872     // For an operand generating multiple values, one of the values may
873     // become dead allowing further simplification (e.g. split index
874     // arithmetic from an indexed load).
875     if (Op->hasOneUse() || Op->getNumValues() > 1)
876       AddToWorklist(Op.getNode());
877 
878   DAG.DeleteNode(N);
879 }
880 
881 // APInts must be the same size for most operations, this helper
882 // function zero extends the shorter of the pair so that they match.
883 // We provide an Offset so that we can create bitwidths that won't overflow.
884 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
885   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
886   LHS = LHS.zextOrSelf(Bits);
887   RHS = RHS.zextOrSelf(Bits);
888 }
889 
890 // Return true if this node is a setcc, or is a select_cc
891 // that selects between the target values used for true and false, making it
892 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
893 // the appropriate nodes based on the type of node we are checking. This
894 // simplifies life a bit for the callers.
895 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
896                                     SDValue &CC, bool MatchStrict) const {
897   if (N.getOpcode() == ISD::SETCC) {
898     LHS = N.getOperand(0);
899     RHS = N.getOperand(1);
900     CC  = N.getOperand(2);
901     return true;
902   }
903 
904   if (MatchStrict &&
905       (N.getOpcode() == ISD::STRICT_FSETCC ||
906        N.getOpcode() == ISD::STRICT_FSETCCS)) {
907     LHS = N.getOperand(1);
908     RHS = N.getOperand(2);
909     CC  = N.getOperand(3);
910     return true;
911   }
912 
913   if (N.getOpcode() != ISD::SELECT_CC ||
914       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
915       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
916     return false;
917 
918   if (TLI.getBooleanContents(N.getValueType()) ==
919       TargetLowering::UndefinedBooleanContent)
920     return false;
921 
922   LHS = N.getOperand(0);
923   RHS = N.getOperand(1);
924   CC  = N.getOperand(4);
925   return true;
926 }
927 
928 /// Return true if this is a SetCC-equivalent operation with only one use.
929 /// If this is true, it allows the users to invert the operation for free when
930 /// it is profitable to do so.
931 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
932   SDValue N0, N1, N2;
933   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
934     return true;
935   return false;
936 }
937 
938 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
939   if (!ScalarTy.isSimple())
940     return false;
941 
942   uint64_t MaskForTy = 0ULL;
943   switch (ScalarTy.getSimpleVT().SimpleTy) {
944   case MVT::i8:
945     MaskForTy = 0xFFULL;
946     break;
947   case MVT::i16:
948     MaskForTy = 0xFFFFULL;
949     break;
950   case MVT::i32:
951     MaskForTy = 0xFFFFFFFFULL;
952     break;
953   default:
954     return false;
955     break;
956   }
957 
958   APInt Val;
959   if (ISD::isConstantSplatVector(N, Val))
960     return Val.getLimitedValue() == MaskForTy;
961 
962   return false;
963 }
964 
965 // Determines if it is a constant integer or a splat/build vector of constant
966 // integers (and undefs).
967 // Do not permit build vector implicit truncation.
968 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
969   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
970     return !(Const->isOpaque() && NoOpaques);
971   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
972     return false;
973   unsigned BitWidth = N.getScalarValueSizeInBits();
974   for (const SDValue &Op : N->op_values()) {
975     if (Op.isUndef())
976       continue;
977     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
978     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
979         (Const->isOpaque() && NoOpaques))
980       return false;
981   }
982   return true;
983 }
984 
985 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
986 // undef's.
987 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
988   if (V.getOpcode() != ISD::BUILD_VECTOR)
989     return false;
990   return isConstantOrConstantVector(V, NoOpaques) ||
991          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
992 }
993 
994 // Determine if this an indexed load with an opaque target constant index.
995 static bool canSplitIdx(LoadSDNode *LD) {
996   return MaySplitLoadIndex &&
997          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
998           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
999 }
1000 
1001 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1002                                                              const SDLoc &DL,
1003                                                              SDValue N0,
1004                                                              SDValue N1) {
1005   // Currently this only tries to ensure we don't undo the GEP splits done by
1006   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1007   // we check if the following transformation would be problematic:
1008   // (load/store (add, (add, x, offset1), offset2)) ->
1009   // (load/store (add, x, offset1+offset2)).
1010 
1011   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1012     return false;
1013 
1014   if (N0.hasOneUse())
1015     return false;
1016 
1017   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1018   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1019   if (!C1 || !C2)
1020     return false;
1021 
1022   const APInt &C1APIntVal = C1->getAPIntValue();
1023   const APInt &C2APIntVal = C2->getAPIntValue();
1024   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1025     return false;
1026 
1027   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1028   if (CombinedValueIntVal.getBitWidth() > 64)
1029     return false;
1030   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1031 
1032   for (SDNode *Node : N0->uses()) {
1033     auto LoadStore = dyn_cast<MemSDNode>(Node);
1034     if (LoadStore) {
1035       // Is x[offset2] already not a legal addressing mode? If so then
1036       // reassociating the constants breaks nothing (we test offset2 because
1037       // that's the one we hope to fold into the load or store).
1038       TargetLoweringBase::AddrMode AM;
1039       AM.HasBaseReg = true;
1040       AM.BaseOffs = C2APIntVal.getSExtValue();
1041       EVT VT = LoadStore->getMemoryVT();
1042       unsigned AS = LoadStore->getAddressSpace();
1043       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1044       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1045         continue;
1046 
1047       // Would x[offset1+offset2] still be a legal addressing mode?
1048       AM.BaseOffs = CombinedValue;
1049       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1050         return true;
1051     }
1052   }
1053 
1054   return false;
1055 }
1056 
1057 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1058 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1059 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1060                                                SDValue N0, SDValue N1) {
1061   EVT VT = N0.getValueType();
1062 
1063   if (N0.getOpcode() != Opc)
1064     return SDValue();
1065 
1066   if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1067     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1068       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1069       if (SDValue OpNode =
1070               DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1071         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1072       return SDValue();
1073     }
1074     if (N0.hasOneUse()) {
1075       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1076       //              iff (op x, c1) has one use
1077       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1078       if (!OpNode.getNode())
1079         return SDValue();
1080       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1081     }
1082   }
1083   return SDValue();
1084 }
1085 
1086 // Try to reassociate commutative binops.
1087 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1088                                     SDValue N1, SDNodeFlags Flags) {
1089   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1090 
1091   // Floating-point reassociation is not allowed without loose FP math.
1092   if (N0.getValueType().isFloatingPoint() ||
1093       N1.getValueType().isFloatingPoint())
1094     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1095       return SDValue();
1096 
1097   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1098     return Combined;
1099   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1100     return Combined;
1101   return SDValue();
1102 }
1103 
1104 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1105                                bool AddTo) {
1106   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1107   ++NodesCombined;
1108   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1109              To[0].getNode()->dump(&DAG);
1110              dbgs() << " and " << NumTo - 1 << " other values\n");
1111   for (unsigned i = 0, e = NumTo; i != e; ++i)
1112     assert((!To[i].getNode() ||
1113             N->getValueType(i) == To[i].getValueType()) &&
1114            "Cannot combine value to value of different type!");
1115 
1116   WorklistRemover DeadNodes(*this);
1117   DAG.ReplaceAllUsesWith(N, To);
1118   if (AddTo) {
1119     // Push the new nodes and any users onto the worklist
1120     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1121       if (To[i].getNode()) {
1122         AddToWorklist(To[i].getNode());
1123         AddUsersToWorklist(To[i].getNode());
1124       }
1125     }
1126   }
1127 
1128   // Finally, if the node is now dead, remove it from the graph.  The node
1129   // may not be dead if the replacement process recursively simplified to
1130   // something else needing this node.
1131   if (N->use_empty())
1132     deleteAndRecombine(N);
1133   return SDValue(N, 0);
1134 }
1135 
1136 void DAGCombiner::
1137 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1138   // Replace the old value with the new one.
1139   ++NodesCombined;
1140   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1141              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1142              dbgs() << '\n');
1143 
1144   // Replace all uses.  If any nodes become isomorphic to other nodes and
1145   // are deleted, make sure to remove them from our worklist.
1146   WorklistRemover DeadNodes(*this);
1147   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1148 
1149   // Push the new node and any (possibly new) users onto the worklist.
1150   AddToWorklistWithUsers(TLO.New.getNode());
1151 
1152   // Finally, if the node is now dead, remove it from the graph.  The node
1153   // may not be dead if the replacement process recursively simplified to
1154   // something else needing this node.
1155   if (TLO.Old.getNode()->use_empty())
1156     deleteAndRecombine(TLO.Old.getNode());
1157 }
1158 
1159 /// Check the specified integer node value to see if it can be simplified or if
1160 /// things it uses can be simplified by bit propagation. If so, return true.
1161 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1162                                        const APInt &DemandedElts,
1163                                        bool AssumeSingleUse) {
1164   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1165   KnownBits Known;
1166   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1167                                 AssumeSingleUse))
1168     return false;
1169 
1170   // Revisit the node.
1171   AddToWorklist(Op.getNode());
1172 
1173   CommitTargetLoweringOpt(TLO);
1174   return true;
1175 }
1176 
1177 /// Check the specified vector node value to see if it can be simplified or
1178 /// if things it uses can be simplified as it only uses some of the elements.
1179 /// If so, return true.
1180 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1181                                              const APInt &DemandedElts,
1182                                              bool AssumeSingleUse) {
1183   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1184   APInt KnownUndef, KnownZero;
1185   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1186                                       TLO, 0, AssumeSingleUse))
1187     return false;
1188 
1189   // Revisit the node.
1190   AddToWorklist(Op.getNode());
1191 
1192   CommitTargetLoweringOpt(TLO);
1193   return true;
1194 }
1195 
1196 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1197   SDLoc DL(Load);
1198   EVT VT = Load->getValueType(0);
1199   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1200 
1201   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1202              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1203   WorklistRemover DeadNodes(*this);
1204   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1205   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1206   deleteAndRecombine(Load);
1207   AddToWorklist(Trunc.getNode());
1208 }
1209 
1210 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1211   Replace = false;
1212   SDLoc DL(Op);
1213   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1214     LoadSDNode *LD = cast<LoadSDNode>(Op);
1215     EVT MemVT = LD->getMemoryVT();
1216     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1217                                                       : LD->getExtensionType();
1218     Replace = true;
1219     return DAG.getExtLoad(ExtType, DL, PVT,
1220                           LD->getChain(), LD->getBasePtr(),
1221                           MemVT, LD->getMemOperand());
1222   }
1223 
1224   unsigned Opc = Op.getOpcode();
1225   switch (Opc) {
1226   default: break;
1227   case ISD::AssertSext:
1228     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1229       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1230     break;
1231   case ISD::AssertZext:
1232     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1233       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1234     break;
1235   case ISD::Constant: {
1236     unsigned ExtOpc =
1237       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1238     return DAG.getNode(ExtOpc, DL, PVT, Op);
1239   }
1240   }
1241 
1242   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1243     return SDValue();
1244   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1245 }
1246 
1247 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1248   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1249     return SDValue();
1250   EVT OldVT = Op.getValueType();
1251   SDLoc DL(Op);
1252   bool Replace = false;
1253   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1254   if (!NewOp.getNode())
1255     return SDValue();
1256   AddToWorklist(NewOp.getNode());
1257 
1258   if (Replace)
1259     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1260   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1261                      DAG.getValueType(OldVT));
1262 }
1263 
1264 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1265   EVT OldVT = Op.getValueType();
1266   SDLoc DL(Op);
1267   bool Replace = false;
1268   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1269   if (!NewOp.getNode())
1270     return SDValue();
1271   AddToWorklist(NewOp.getNode());
1272 
1273   if (Replace)
1274     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1275   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1276 }
1277 
1278 /// Promote the specified integer binary operation if the target indicates it is
1279 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1280 /// i32 since i16 instructions are longer.
1281 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1282   if (!LegalOperations)
1283     return SDValue();
1284 
1285   EVT VT = Op.getValueType();
1286   if (VT.isVector() || !VT.isInteger())
1287     return SDValue();
1288 
1289   // If operation type is 'undesirable', e.g. i16 on x86, consider
1290   // promoting it.
1291   unsigned Opc = Op.getOpcode();
1292   if (TLI.isTypeDesirableForOp(Opc, VT))
1293     return SDValue();
1294 
1295   EVT PVT = VT;
1296   // Consult target whether it is a good idea to promote this operation and
1297   // what's the right type to promote it to.
1298   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1299     assert(PVT != VT && "Don't know what type to promote to!");
1300 
1301     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1302 
1303     bool Replace0 = false;
1304     SDValue N0 = Op.getOperand(0);
1305     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1306 
1307     bool Replace1 = false;
1308     SDValue N1 = Op.getOperand(1);
1309     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1310     SDLoc DL(Op);
1311 
1312     SDValue RV =
1313         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1314 
1315     // We are always replacing N0/N1's use in N and only need additional
1316     // replacements if there are additional uses.
1317     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1318     //       (SDValue) here because the node may reference multiple values
1319     //       (for example, the chain value of a load node).
1320     Replace0 &= !N0->hasOneUse();
1321     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1322 
1323     // Combine Op here so it is preserved past replacements.
1324     CombineTo(Op.getNode(), RV);
1325 
1326     // If operands have a use ordering, make sure we deal with
1327     // predecessor first.
1328     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1329       std::swap(N0, N1);
1330       std::swap(NN0, NN1);
1331     }
1332 
1333     if (Replace0) {
1334       AddToWorklist(NN0.getNode());
1335       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1336     }
1337     if (Replace1) {
1338       AddToWorklist(NN1.getNode());
1339       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1340     }
1341     return Op;
1342   }
1343   return SDValue();
1344 }
1345 
1346 /// Promote the specified integer shift operation if the target indicates it is
1347 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1348 /// i32 since i16 instructions are longer.
1349 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1350   if (!LegalOperations)
1351     return SDValue();
1352 
1353   EVT VT = Op.getValueType();
1354   if (VT.isVector() || !VT.isInteger())
1355     return SDValue();
1356 
1357   // If operation type is 'undesirable', e.g. i16 on x86, consider
1358   // promoting it.
1359   unsigned Opc = Op.getOpcode();
1360   if (TLI.isTypeDesirableForOp(Opc, VT))
1361     return SDValue();
1362 
1363   EVT PVT = VT;
1364   // Consult target whether it is a good idea to promote this operation and
1365   // what's the right type to promote it to.
1366   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1367     assert(PVT != VT && "Don't know what type to promote to!");
1368 
1369     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1370 
1371     bool Replace = false;
1372     SDValue N0 = Op.getOperand(0);
1373     SDValue N1 = Op.getOperand(1);
1374     if (Opc == ISD::SRA)
1375       N0 = SExtPromoteOperand(N0, PVT);
1376     else if (Opc == ISD::SRL)
1377       N0 = ZExtPromoteOperand(N0, PVT);
1378     else
1379       N0 = PromoteOperand(N0, PVT, Replace);
1380 
1381     if (!N0.getNode())
1382       return SDValue();
1383 
1384     SDLoc DL(Op);
1385     SDValue RV =
1386         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1387 
1388     if (Replace)
1389       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1390 
1391     // Deal with Op being deleted.
1392     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1393       return RV;
1394   }
1395   return SDValue();
1396 }
1397 
1398 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1399   if (!LegalOperations)
1400     return SDValue();
1401 
1402   EVT VT = Op.getValueType();
1403   if (VT.isVector() || !VT.isInteger())
1404     return SDValue();
1405 
1406   // If operation type is 'undesirable', e.g. i16 on x86, consider
1407   // promoting it.
1408   unsigned Opc = Op.getOpcode();
1409   if (TLI.isTypeDesirableForOp(Opc, VT))
1410     return SDValue();
1411 
1412   EVT PVT = VT;
1413   // Consult target whether it is a good idea to promote this operation and
1414   // what's the right type to promote it to.
1415   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1416     assert(PVT != VT && "Don't know what type to promote to!");
1417     // fold (aext (aext x)) -> (aext x)
1418     // fold (aext (zext x)) -> (zext x)
1419     // fold (aext (sext x)) -> (sext x)
1420     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1421     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1422   }
1423   return SDValue();
1424 }
1425 
1426 bool DAGCombiner::PromoteLoad(SDValue Op) {
1427   if (!LegalOperations)
1428     return false;
1429 
1430   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1431     return false;
1432 
1433   EVT VT = Op.getValueType();
1434   if (VT.isVector() || !VT.isInteger())
1435     return false;
1436 
1437   // If operation type is 'undesirable', e.g. i16 on x86, consider
1438   // promoting it.
1439   unsigned Opc = Op.getOpcode();
1440   if (TLI.isTypeDesirableForOp(Opc, VT))
1441     return false;
1442 
1443   EVT PVT = VT;
1444   // Consult target whether it is a good idea to promote this operation and
1445   // what's the right type to promote it to.
1446   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1447     assert(PVT != VT && "Don't know what type to promote to!");
1448 
1449     SDLoc DL(Op);
1450     SDNode *N = Op.getNode();
1451     LoadSDNode *LD = cast<LoadSDNode>(N);
1452     EVT MemVT = LD->getMemoryVT();
1453     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1454                                                       : LD->getExtensionType();
1455     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1456                                    LD->getChain(), LD->getBasePtr(),
1457                                    MemVT, LD->getMemOperand());
1458     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1459 
1460     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1461                Result.getNode()->dump(&DAG); dbgs() << '\n');
1462     WorklistRemover DeadNodes(*this);
1463     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1464     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1465     deleteAndRecombine(N);
1466     AddToWorklist(Result.getNode());
1467     return true;
1468   }
1469   return false;
1470 }
1471 
1472 /// Recursively delete a node which has no uses and any operands for
1473 /// which it is the only use.
1474 ///
1475 /// Note that this both deletes the nodes and removes them from the worklist.
1476 /// It also adds any nodes who have had a user deleted to the worklist as they
1477 /// may now have only one use and subject to other combines.
1478 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1479   if (!N->use_empty())
1480     return false;
1481 
1482   SmallSetVector<SDNode *, 16> Nodes;
1483   Nodes.insert(N);
1484   do {
1485     N = Nodes.pop_back_val();
1486     if (!N)
1487       continue;
1488 
1489     if (N->use_empty()) {
1490       for (const SDValue &ChildN : N->op_values())
1491         Nodes.insert(ChildN.getNode());
1492 
1493       removeFromWorklist(N);
1494       DAG.DeleteNode(N);
1495     } else {
1496       AddToWorklist(N);
1497     }
1498   } while (!Nodes.empty());
1499   return true;
1500 }
1501 
1502 //===----------------------------------------------------------------------===//
1503 //  Main DAG Combiner implementation
1504 //===----------------------------------------------------------------------===//
1505 
1506 void DAGCombiner::Run(CombineLevel AtLevel) {
1507   // set the instance variables, so that the various visit routines may use it.
1508   Level = AtLevel;
1509   LegalDAG = Level >= AfterLegalizeDAG;
1510   LegalOperations = Level >= AfterLegalizeVectorOps;
1511   LegalTypes = Level >= AfterLegalizeTypes;
1512 
1513   WorklistInserter AddNodes(*this);
1514 
1515   // Add all the dag nodes to the worklist.
1516   for (SDNode &Node : DAG.allnodes())
1517     AddToWorklist(&Node);
1518 
1519   // Create a dummy node (which is not added to allnodes), that adds a reference
1520   // to the root node, preventing it from being deleted, and tracking any
1521   // changes of the root.
1522   HandleSDNode Dummy(DAG.getRoot());
1523 
1524   // While we have a valid worklist entry node, try to combine it.
1525   while (SDNode *N = getNextWorklistEntry()) {
1526     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1527     // N is deleted from the DAG, since they too may now be dead or may have a
1528     // reduced number of uses, allowing other xforms.
1529     if (recursivelyDeleteUnusedNodes(N))
1530       continue;
1531 
1532     WorklistRemover DeadNodes(*this);
1533 
1534     // If this combine is running after legalizing the DAG, re-legalize any
1535     // nodes pulled off the worklist.
1536     if (LegalDAG) {
1537       SmallSetVector<SDNode *, 16> UpdatedNodes;
1538       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1539 
1540       for (SDNode *LN : UpdatedNodes)
1541         AddToWorklistWithUsers(LN);
1542 
1543       if (!NIsValid)
1544         continue;
1545     }
1546 
1547     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1548 
1549     // Add any operands of the new node which have not yet been combined to the
1550     // worklist as well. Because the worklist uniques things already, this
1551     // won't repeatedly process the same operand.
1552     CombinedNodes.insert(N);
1553     for (const SDValue &ChildN : N->op_values())
1554       if (!CombinedNodes.count(ChildN.getNode()))
1555         AddToWorklist(ChildN.getNode());
1556 
1557     SDValue RV = combine(N);
1558 
1559     if (!RV.getNode())
1560       continue;
1561 
1562     ++NodesCombined;
1563 
1564     // If we get back the same node we passed in, rather than a new node or
1565     // zero, we know that the node must have defined multiple values and
1566     // CombineTo was used.  Since CombineTo takes care of the worklist
1567     // mechanics for us, we have no work to do in this case.
1568     if (RV.getNode() == N)
1569       continue;
1570 
1571     assert(N->getOpcode() != ISD::DELETED_NODE &&
1572            RV.getOpcode() != ISD::DELETED_NODE &&
1573            "Node was deleted but visit returned new node!");
1574 
1575     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1576 
1577     if (N->getNumValues() == RV.getNode()->getNumValues())
1578       DAG.ReplaceAllUsesWith(N, RV.getNode());
1579     else {
1580       assert(N->getValueType(0) == RV.getValueType() &&
1581              N->getNumValues() == 1 && "Type mismatch");
1582       DAG.ReplaceAllUsesWith(N, &RV);
1583     }
1584 
1585     // Push the new node and any users onto the worklist.  Omit this if the
1586     // new node is the EntryToken (e.g. if a store managed to get optimized
1587     // out), because re-visiting the EntryToken and its users will not uncover
1588     // any additional opportunities, but there may be a large number of such
1589     // users, potentially causing compile time explosion.
1590     if (RV.getOpcode() != ISD::EntryToken) {
1591       AddToWorklist(RV.getNode());
1592       AddUsersToWorklist(RV.getNode());
1593     }
1594 
1595     // Finally, if the node is now dead, remove it from the graph.  The node
1596     // may not be dead if the replacement process recursively simplified to
1597     // something else needing this node. This will also take care of adding any
1598     // operands which have lost a user to the worklist.
1599     recursivelyDeleteUnusedNodes(N);
1600   }
1601 
1602   // If the root changed (e.g. it was a dead load, update the root).
1603   DAG.setRoot(Dummy.getValue());
1604   DAG.RemoveDeadNodes();
1605 }
1606 
1607 SDValue DAGCombiner::visit(SDNode *N) {
1608   switch (N->getOpcode()) {
1609   default: break;
1610   case ISD::TokenFactor:        return visitTokenFactor(N);
1611   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1612   case ISD::ADD:                return visitADD(N);
1613   case ISD::SUB:                return visitSUB(N);
1614   case ISD::SADDSAT:
1615   case ISD::UADDSAT:            return visitADDSAT(N);
1616   case ISD::SSUBSAT:
1617   case ISD::USUBSAT:            return visitSUBSAT(N);
1618   case ISD::ADDC:               return visitADDC(N);
1619   case ISD::SADDO:
1620   case ISD::UADDO:              return visitADDO(N);
1621   case ISD::SUBC:               return visitSUBC(N);
1622   case ISD::SSUBO:
1623   case ISD::USUBO:              return visitSUBO(N);
1624   case ISD::ADDE:               return visitADDE(N);
1625   case ISD::ADDCARRY:           return visitADDCARRY(N);
1626   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1627   case ISD::SUBE:               return visitSUBE(N);
1628   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1629   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1630   case ISD::SMULFIX:
1631   case ISD::SMULFIXSAT:
1632   case ISD::UMULFIX:
1633   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1634   case ISD::MUL:                return visitMUL(N);
1635   case ISD::SDIV:               return visitSDIV(N);
1636   case ISD::UDIV:               return visitUDIV(N);
1637   case ISD::SREM:
1638   case ISD::UREM:               return visitREM(N);
1639   case ISD::MULHU:              return visitMULHU(N);
1640   case ISD::MULHS:              return visitMULHS(N);
1641   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1642   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1643   case ISD::SMULO:
1644   case ISD::UMULO:              return visitMULO(N);
1645   case ISD::SMIN:
1646   case ISD::SMAX:
1647   case ISD::UMIN:
1648   case ISD::UMAX:               return visitIMINMAX(N);
1649   case ISD::AND:                return visitAND(N);
1650   case ISD::OR:                 return visitOR(N);
1651   case ISD::XOR:                return visitXOR(N);
1652   case ISD::SHL:                return visitSHL(N);
1653   case ISD::SRA:                return visitSRA(N);
1654   case ISD::SRL:                return visitSRL(N);
1655   case ISD::ROTR:
1656   case ISD::ROTL:               return visitRotate(N);
1657   case ISD::FSHL:
1658   case ISD::FSHR:               return visitFunnelShift(N);
1659   case ISD::ABS:                return visitABS(N);
1660   case ISD::BSWAP:              return visitBSWAP(N);
1661   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1662   case ISD::CTLZ:               return visitCTLZ(N);
1663   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1664   case ISD::CTTZ:               return visitCTTZ(N);
1665   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1666   case ISD::CTPOP:              return visitCTPOP(N);
1667   case ISD::SELECT:             return visitSELECT(N);
1668   case ISD::VSELECT:            return visitVSELECT(N);
1669   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1670   case ISD::SETCC:              return visitSETCC(N);
1671   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1672   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1673   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1674   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1675   case ISD::AssertSext:
1676   case ISD::AssertZext:         return visitAssertExt(N);
1677   case ISD::AssertAlign:        return visitAssertAlign(N);
1678   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1679   case ISD::SIGN_EXTEND_VECTOR_INREG:
1680   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1681   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1682   case ISD::BITCAST:            return visitBITCAST(N);
1683   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1684   case ISD::FADD:               return visitFADD(N);
1685   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1686   case ISD::FSUB:               return visitFSUB(N);
1687   case ISD::FMUL:               return visitFMUL(N);
1688   case ISD::FMA:                return visitFMA(N);
1689   case ISD::FDIV:               return visitFDIV(N);
1690   case ISD::FREM:               return visitFREM(N);
1691   case ISD::FSQRT:              return visitFSQRT(N);
1692   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1693   case ISD::FPOW:               return visitFPOW(N);
1694   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1695   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1696   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1697   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1698   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1699   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1700   case ISD::FNEG:               return visitFNEG(N);
1701   case ISD::FABS:               return visitFABS(N);
1702   case ISD::FFLOOR:             return visitFFLOOR(N);
1703   case ISD::FMINNUM:            return visitFMINNUM(N);
1704   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1705   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1706   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1707   case ISD::FCEIL:              return visitFCEIL(N);
1708   case ISD::FTRUNC:             return visitFTRUNC(N);
1709   case ISD::BRCOND:             return visitBRCOND(N);
1710   case ISD::BR_CC:              return visitBR_CC(N);
1711   case ISD::LOAD:               return visitLOAD(N);
1712   case ISD::STORE:              return visitSTORE(N);
1713   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1714   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1715   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1716   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1717   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1718   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1719   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1720   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1721   case ISD::MGATHER:            return visitMGATHER(N);
1722   case ISD::MLOAD:              return visitMLOAD(N);
1723   case ISD::MSCATTER:           return visitMSCATTER(N);
1724   case ISD::MSTORE:             return visitMSTORE(N);
1725   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1726   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1727   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1728   case ISD::FREEZE:             return visitFREEZE(N);
1729   case ISD::VECREDUCE_FADD:
1730   case ISD::VECREDUCE_FMUL:
1731   case ISD::VECREDUCE_ADD:
1732   case ISD::VECREDUCE_MUL:
1733   case ISD::VECREDUCE_AND:
1734   case ISD::VECREDUCE_OR:
1735   case ISD::VECREDUCE_XOR:
1736   case ISD::VECREDUCE_SMAX:
1737   case ISD::VECREDUCE_SMIN:
1738   case ISD::VECREDUCE_UMAX:
1739   case ISD::VECREDUCE_UMIN:
1740   case ISD::VECREDUCE_FMAX:
1741   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1742 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1743 #include "llvm/IR/VPIntrinsics.def"
1744     return visitVPOp(N);
1745   }
1746   return SDValue();
1747 }
1748 
1749 SDValue DAGCombiner::combine(SDNode *N) {
1750   SDValue RV;
1751   if (!DisableGenericCombines)
1752     RV = visit(N);
1753 
1754   // If nothing happened, try a target-specific DAG combine.
1755   if (!RV.getNode()) {
1756     assert(N->getOpcode() != ISD::DELETED_NODE &&
1757            "Node was deleted but visit returned NULL!");
1758 
1759     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1760         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1761 
1762       // Expose the DAG combiner to the target combiner impls.
1763       TargetLowering::DAGCombinerInfo
1764         DagCombineInfo(DAG, Level, false, this);
1765 
1766       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1767     }
1768   }
1769 
1770   // If nothing happened still, try promoting the operation.
1771   if (!RV.getNode()) {
1772     switch (N->getOpcode()) {
1773     default: break;
1774     case ISD::ADD:
1775     case ISD::SUB:
1776     case ISD::MUL:
1777     case ISD::AND:
1778     case ISD::OR:
1779     case ISD::XOR:
1780       RV = PromoteIntBinOp(SDValue(N, 0));
1781       break;
1782     case ISD::SHL:
1783     case ISD::SRA:
1784     case ISD::SRL:
1785       RV = PromoteIntShiftOp(SDValue(N, 0));
1786       break;
1787     case ISD::SIGN_EXTEND:
1788     case ISD::ZERO_EXTEND:
1789     case ISD::ANY_EXTEND:
1790       RV = PromoteExtend(SDValue(N, 0));
1791       break;
1792     case ISD::LOAD:
1793       if (PromoteLoad(SDValue(N, 0)))
1794         RV = SDValue(N, 0);
1795       break;
1796     }
1797   }
1798 
1799   // If N is a commutative binary node, try to eliminate it if the commuted
1800   // version is already present in the DAG.
1801   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1802       N->getNumValues() == 1) {
1803     SDValue N0 = N->getOperand(0);
1804     SDValue N1 = N->getOperand(1);
1805 
1806     // Constant operands are canonicalized to RHS.
1807     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1808       SDValue Ops[] = {N1, N0};
1809       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1810                                             N->getFlags());
1811       if (CSENode)
1812         return SDValue(CSENode, 0);
1813     }
1814   }
1815 
1816   return RV;
1817 }
1818 
1819 /// Given a node, return its input chain if it has one, otherwise return a null
1820 /// sd operand.
1821 static SDValue getInputChainForNode(SDNode *N) {
1822   if (unsigned NumOps = N->getNumOperands()) {
1823     if (N->getOperand(0).getValueType() == MVT::Other)
1824       return N->getOperand(0);
1825     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1826       return N->getOperand(NumOps-1);
1827     for (unsigned i = 1; i < NumOps-1; ++i)
1828       if (N->getOperand(i).getValueType() == MVT::Other)
1829         return N->getOperand(i);
1830   }
1831   return SDValue();
1832 }
1833 
1834 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1835   // If N has two operands, where one has an input chain equal to the other,
1836   // the 'other' chain is redundant.
1837   if (N->getNumOperands() == 2) {
1838     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1839       return N->getOperand(0);
1840     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1841       return N->getOperand(1);
1842   }
1843 
1844   // Don't simplify token factors if optnone.
1845   if (OptLevel == CodeGenOpt::None)
1846     return SDValue();
1847 
1848   // Don't simplify the token factor if the node itself has too many operands.
1849   if (N->getNumOperands() > TokenFactorInlineLimit)
1850     return SDValue();
1851 
1852   // If the sole user is a token factor, we should make sure we have a
1853   // chance to merge them together. This prevents TF chains from inhibiting
1854   // optimizations.
1855   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1856     AddToWorklist(*(N->use_begin()));
1857 
1858   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1859   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1860   SmallPtrSet<SDNode*, 16> SeenOps;
1861   bool Changed = false;             // If we should replace this token factor.
1862 
1863   // Start out with this token factor.
1864   TFs.push_back(N);
1865 
1866   // Iterate through token factors.  The TFs grows when new token factors are
1867   // encountered.
1868   for (unsigned i = 0; i < TFs.size(); ++i) {
1869     // Limit number of nodes to inline, to avoid quadratic compile times.
1870     // We have to add the outstanding Token Factors to Ops, otherwise we might
1871     // drop Ops from the resulting Token Factors.
1872     if (Ops.size() > TokenFactorInlineLimit) {
1873       for (unsigned j = i; j < TFs.size(); j++)
1874         Ops.emplace_back(TFs[j], 0);
1875       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1876       // combiner worklist later.
1877       TFs.resize(i);
1878       break;
1879     }
1880 
1881     SDNode *TF = TFs[i];
1882     // Check each of the operands.
1883     for (const SDValue &Op : TF->op_values()) {
1884       switch (Op.getOpcode()) {
1885       case ISD::EntryToken:
1886         // Entry tokens don't need to be added to the list. They are
1887         // redundant.
1888         Changed = true;
1889         break;
1890 
1891       case ISD::TokenFactor:
1892         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1893           // Queue up for processing.
1894           TFs.push_back(Op.getNode());
1895           Changed = true;
1896           break;
1897         }
1898         LLVM_FALLTHROUGH;
1899 
1900       default:
1901         // Only add if it isn't already in the list.
1902         if (SeenOps.insert(Op.getNode()).second)
1903           Ops.push_back(Op);
1904         else
1905           Changed = true;
1906         break;
1907       }
1908     }
1909   }
1910 
1911   // Re-visit inlined Token Factors, to clean them up in case they have been
1912   // removed. Skip the first Token Factor, as this is the current node.
1913   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1914     AddToWorklist(TFs[i]);
1915 
1916   // Remove Nodes that are chained to another node in the list. Do so
1917   // by walking up chains breath-first stopping when we've seen
1918   // another operand. In general we must climb to the EntryNode, but we can exit
1919   // early if we find all remaining work is associated with just one operand as
1920   // no further pruning is possible.
1921 
1922   // List of nodes to search through and original Ops from which they originate.
1923   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1924   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1925   SmallPtrSet<SDNode *, 16> SeenChains;
1926   bool DidPruneOps = false;
1927 
1928   unsigned NumLeftToConsider = 0;
1929   for (const SDValue &Op : Ops) {
1930     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1931     OpWorkCount.push_back(1);
1932   }
1933 
1934   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1935     // If this is an Op, we can remove the op from the list. Remark any
1936     // search associated with it as from the current OpNumber.
1937     if (SeenOps.contains(Op)) {
1938       Changed = true;
1939       DidPruneOps = true;
1940       unsigned OrigOpNumber = 0;
1941       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1942         OrigOpNumber++;
1943       assert((OrigOpNumber != Ops.size()) &&
1944              "expected to find TokenFactor Operand");
1945       // Re-mark worklist from OrigOpNumber to OpNumber
1946       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1947         if (Worklist[i].second == OrigOpNumber) {
1948           Worklist[i].second = OpNumber;
1949         }
1950       }
1951       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1952       OpWorkCount[OrigOpNumber] = 0;
1953       NumLeftToConsider--;
1954     }
1955     // Add if it's a new chain
1956     if (SeenChains.insert(Op).second) {
1957       OpWorkCount[OpNumber]++;
1958       Worklist.push_back(std::make_pair(Op, OpNumber));
1959     }
1960   };
1961 
1962   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1963     // We need at least be consider at least 2 Ops to prune.
1964     if (NumLeftToConsider <= 1)
1965       break;
1966     auto CurNode = Worklist[i].first;
1967     auto CurOpNumber = Worklist[i].second;
1968     assert((OpWorkCount[CurOpNumber] > 0) &&
1969            "Node should not appear in worklist");
1970     switch (CurNode->getOpcode()) {
1971     case ISD::EntryToken:
1972       // Hitting EntryToken is the only way for the search to terminate without
1973       // hitting
1974       // another operand's search. Prevent us from marking this operand
1975       // considered.
1976       NumLeftToConsider++;
1977       break;
1978     case ISD::TokenFactor:
1979       for (const SDValue &Op : CurNode->op_values())
1980         AddToWorklist(i, Op.getNode(), CurOpNumber);
1981       break;
1982     case ISD::LIFETIME_START:
1983     case ISD::LIFETIME_END:
1984     case ISD::CopyFromReg:
1985     case ISD::CopyToReg:
1986       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1987       break;
1988     default:
1989       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1990         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1991       break;
1992     }
1993     OpWorkCount[CurOpNumber]--;
1994     if (OpWorkCount[CurOpNumber] == 0)
1995       NumLeftToConsider--;
1996   }
1997 
1998   // If we've changed things around then replace token factor.
1999   if (Changed) {
2000     SDValue Result;
2001     if (Ops.empty()) {
2002       // The entry token is the only possible outcome.
2003       Result = DAG.getEntryNode();
2004     } else {
2005       if (DidPruneOps) {
2006         SmallVector<SDValue, 8> PrunedOps;
2007         //
2008         for (const SDValue &Op : Ops) {
2009           if (SeenChains.count(Op.getNode()) == 0)
2010             PrunedOps.push_back(Op);
2011         }
2012         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2013       } else {
2014         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2015       }
2016     }
2017     return Result;
2018   }
2019   return SDValue();
2020 }
2021 
2022 /// MERGE_VALUES can always be eliminated.
2023 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2024   WorklistRemover DeadNodes(*this);
2025   // Replacing results may cause a different MERGE_VALUES to suddenly
2026   // be CSE'd with N, and carry its uses with it. Iterate until no
2027   // uses remain, to ensure that the node can be safely deleted.
2028   // First add the users of this node to the work list so that they
2029   // can be tried again once they have new operands.
2030   AddUsersToWorklist(N);
2031   do {
2032     // Do as a single replacement to avoid rewalking use lists.
2033     SmallVector<SDValue, 8> Ops;
2034     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2035       Ops.push_back(N->getOperand(i));
2036     DAG.ReplaceAllUsesWith(N, Ops.data());
2037   } while (!N->use_empty());
2038   deleteAndRecombine(N);
2039   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2040 }
2041 
2042 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2043 /// ConstantSDNode pointer else nullptr.
2044 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2045   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2046   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2047 }
2048 
2049 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2050 /// and that N may be folded in the load / store addressing mode.
2051 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2052                                     const TargetLowering &TLI) {
2053   EVT VT;
2054   unsigned AS;
2055 
2056   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2057     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2058       return false;
2059     VT = LD->getMemoryVT();
2060     AS = LD->getAddressSpace();
2061   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2062     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2063       return false;
2064     VT = ST->getMemoryVT();
2065     AS = ST->getAddressSpace();
2066   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2067     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2068       return false;
2069     VT = LD->getMemoryVT();
2070     AS = LD->getAddressSpace();
2071   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2072     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2073       return false;
2074     VT = ST->getMemoryVT();
2075     AS = ST->getAddressSpace();
2076   } else
2077     return false;
2078 
2079   TargetLowering::AddrMode AM;
2080   if (N->getOpcode() == ISD::ADD) {
2081     AM.HasBaseReg = true;
2082     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2083     if (Offset)
2084       // [reg +/- imm]
2085       AM.BaseOffs = Offset->getSExtValue();
2086     else
2087       // [reg +/- reg]
2088       AM.Scale = 1;
2089   } else if (N->getOpcode() == ISD::SUB) {
2090     AM.HasBaseReg = true;
2091     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2092     if (Offset)
2093       // [reg +/- imm]
2094       AM.BaseOffs = -Offset->getSExtValue();
2095     else
2096       // [reg +/- reg]
2097       AM.Scale = 1;
2098   } else
2099     return false;
2100 
2101   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2102                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2103 }
2104 
2105 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2106   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2107          "Unexpected binary operator");
2108 
2109   // Don't do this unless the old select is going away. We want to eliminate the
2110   // binary operator, not replace a binop with a select.
2111   // TODO: Handle ISD::SELECT_CC.
2112   unsigned SelOpNo = 0;
2113   SDValue Sel = BO->getOperand(0);
2114   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2115     SelOpNo = 1;
2116     Sel = BO->getOperand(1);
2117   }
2118 
2119   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2120     return SDValue();
2121 
2122   SDValue CT = Sel.getOperand(1);
2123   if (!isConstantOrConstantVector(CT, true) &&
2124       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2125     return SDValue();
2126 
2127   SDValue CF = Sel.getOperand(2);
2128   if (!isConstantOrConstantVector(CF, true) &&
2129       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2130     return SDValue();
2131 
2132   // Bail out if any constants are opaque because we can't constant fold those.
2133   // The exception is "and" and "or" with either 0 or -1 in which case we can
2134   // propagate non constant operands into select. I.e.:
2135   // and (select Cond, 0, -1), X --> select Cond, 0, X
2136   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2137   auto BinOpcode = BO->getOpcode();
2138   bool CanFoldNonConst =
2139       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2140       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2141       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2142 
2143   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2144   if (!CanFoldNonConst &&
2145       !isConstantOrConstantVector(CBO, true) &&
2146       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2147     return SDValue();
2148 
2149   EVT VT = BO->getValueType(0);
2150 
2151   // We have a select-of-constants followed by a binary operator with a
2152   // constant. Eliminate the binop by pulling the constant math into the select.
2153   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2154   SDLoc DL(Sel);
2155   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2156                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2157   if (!CanFoldNonConst && !NewCT.isUndef() &&
2158       !isConstantOrConstantVector(NewCT, true) &&
2159       !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2160     return SDValue();
2161 
2162   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2163                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2164   if (!CanFoldNonConst && !NewCF.isUndef() &&
2165       !isConstantOrConstantVector(NewCF, true) &&
2166       !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2167     return SDValue();
2168 
2169   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2170   SelectOp->setFlags(BO->getFlags());
2171   return SelectOp;
2172 }
2173 
2174 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2175   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2176          "Expecting add or sub");
2177 
2178   // Match a constant operand and a zext operand for the math instruction:
2179   // add Z, C
2180   // sub C, Z
2181   bool IsAdd = N->getOpcode() == ISD::ADD;
2182   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2183   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2184   auto *CN = dyn_cast<ConstantSDNode>(C);
2185   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2186     return SDValue();
2187 
2188   // Match the zext operand as a setcc of a boolean.
2189   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2190       Z.getOperand(0).getValueType() != MVT::i1)
2191     return SDValue();
2192 
2193   // Match the compare as: setcc (X & 1), 0, eq.
2194   SDValue SetCC = Z.getOperand(0);
2195   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2196   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2197       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2198       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2199     return SDValue();
2200 
2201   // We are adding/subtracting a constant and an inverted low bit. Turn that
2202   // into a subtract/add of the low bit with incremented/decremented constant:
2203   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2204   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2205   EVT VT = C.getValueType();
2206   SDLoc DL(N);
2207   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2208   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2209                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2210   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2211 }
2212 
2213 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2214 /// a shift and add with a different constant.
2215 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2216   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2217          "Expecting add or sub");
2218 
2219   // We need a constant operand for the add/sub, and the other operand is a
2220   // logical shift right: add (srl), C or sub C, (srl).
2221   bool IsAdd = N->getOpcode() == ISD::ADD;
2222   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2223   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2224   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2225       ShiftOp.getOpcode() != ISD::SRL)
2226     return SDValue();
2227 
2228   // The shift must be of a 'not' value.
2229   SDValue Not = ShiftOp.getOperand(0);
2230   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2231     return SDValue();
2232 
2233   // The shift must be moving the sign bit to the least-significant-bit.
2234   EVT VT = ShiftOp.getValueType();
2235   SDValue ShAmt = ShiftOp.getOperand(1);
2236   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2237   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2238     return SDValue();
2239 
2240   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2241   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2242   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2243   SDLoc DL(N);
2244   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2245   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2246   if (SDValue NewC =
2247           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2248                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2249     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2250   return SDValue();
2251 }
2252 
2253 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2254 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2255 /// are no common bits set in the operands).
2256 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2257   SDValue N0 = N->getOperand(0);
2258   SDValue N1 = N->getOperand(1);
2259   EVT VT = N0.getValueType();
2260   SDLoc DL(N);
2261 
2262   // fold vector ops
2263   if (VT.isVector()) {
2264     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2265       return FoldedVOp;
2266 
2267     // fold (add x, 0) -> x, vector edition
2268     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2269       return N0;
2270     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2271       return N1;
2272   }
2273 
2274   // fold (add x, undef) -> undef
2275   if (N0.isUndef())
2276     return N0;
2277 
2278   if (N1.isUndef())
2279     return N1;
2280 
2281   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2282     // canonicalize constant to RHS
2283     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2284       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2285     // fold (add c1, c2) -> c1+c2
2286     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2287   }
2288 
2289   // fold (add x, 0) -> x
2290   if (isNullConstant(N1))
2291     return N0;
2292 
2293   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2294     // fold ((A-c1)+c2) -> (A+(c2-c1))
2295     if (N0.getOpcode() == ISD::SUB &&
2296         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2297       SDValue Sub =
2298           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2299       assert(Sub && "Constant folding failed");
2300       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2301     }
2302 
2303     // fold ((c1-A)+c2) -> (c1+c2)-A
2304     if (N0.getOpcode() == ISD::SUB &&
2305         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2306       SDValue Add =
2307           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2308       assert(Add && "Constant folding failed");
2309       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2310     }
2311 
2312     // add (sext i1 X), 1 -> zext (not i1 X)
2313     // We don't transform this pattern:
2314     //   add (zext i1 X), -1 -> sext (not i1 X)
2315     // because most (?) targets generate better code for the zext form.
2316     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2317         isOneOrOneSplat(N1)) {
2318       SDValue X = N0.getOperand(0);
2319       if ((!LegalOperations ||
2320            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2321             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2322           X.getScalarValueSizeInBits() == 1) {
2323         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2324         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2325       }
2326     }
2327 
2328     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2329     // equivalent to (add x, c0).
2330     if (N0.getOpcode() == ISD::OR &&
2331         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2332         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2333       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2334                                                     {N1, N0.getOperand(1)}))
2335         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2336     }
2337   }
2338 
2339   if (SDValue NewSel = foldBinOpIntoSelect(N))
2340     return NewSel;
2341 
2342   // reassociate add
2343   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2344     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2345       return RADD;
2346 
2347     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2348     // equivalent to (add x, c).
2349     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2350       if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2351           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2352           DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2353         return DAG.getNode(ISD::ADD, DL, VT,
2354                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2355                            N0.getOperand(1));
2356       }
2357       return SDValue();
2358     };
2359     if (SDValue Add = ReassociateAddOr(N0, N1))
2360       return Add;
2361     if (SDValue Add = ReassociateAddOr(N1, N0))
2362       return Add;
2363   }
2364   // fold ((0-A) + B) -> B-A
2365   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2366     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2367 
2368   // fold (A + (0-B)) -> A-B
2369   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2370     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2371 
2372   // fold (A+(B-A)) -> B
2373   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2374     return N1.getOperand(0);
2375 
2376   // fold ((B-A)+A) -> B
2377   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2378     return N0.getOperand(0);
2379 
2380   // fold ((A-B)+(C-A)) -> (C-B)
2381   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2382       N0.getOperand(0) == N1.getOperand(1))
2383     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2384                        N0.getOperand(1));
2385 
2386   // fold ((A-B)+(B-C)) -> (A-C)
2387   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2388       N0.getOperand(1) == N1.getOperand(0))
2389     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2390                        N1.getOperand(1));
2391 
2392   // fold (A+(B-(A+C))) to (B-C)
2393   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2394       N0 == N1.getOperand(1).getOperand(0))
2395     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2396                        N1.getOperand(1).getOperand(1));
2397 
2398   // fold (A+(B-(C+A))) to (B-C)
2399   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2400       N0 == N1.getOperand(1).getOperand(1))
2401     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2402                        N1.getOperand(1).getOperand(0));
2403 
2404   // fold (A+((B-A)+or-C)) to (B+or-C)
2405   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2406       N1.getOperand(0).getOpcode() == ISD::SUB &&
2407       N0 == N1.getOperand(0).getOperand(1))
2408     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2409                        N1.getOperand(1));
2410 
2411   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2412   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2413     SDValue N00 = N0.getOperand(0);
2414     SDValue N01 = N0.getOperand(1);
2415     SDValue N10 = N1.getOperand(0);
2416     SDValue N11 = N1.getOperand(1);
2417 
2418     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2419       return DAG.getNode(ISD::SUB, DL, VT,
2420                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2421                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2422   }
2423 
2424   // fold (add (umax X, C), -C) --> (usubsat X, C)
2425   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2426     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2427       return (!Max && !Op) ||
2428              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2429     };
2430     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2431                                   /*AllowUndefs*/ true))
2432       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2433                          N0.getOperand(1));
2434   }
2435 
2436   if (SimplifyDemandedBits(SDValue(N, 0)))
2437     return SDValue(N, 0);
2438 
2439   if (isOneOrOneSplat(N1)) {
2440     // fold (add (xor a, -1), 1) -> (sub 0, a)
2441     if (isBitwiseNot(N0))
2442       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2443                          N0.getOperand(0));
2444 
2445     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2446     if (N0.getOpcode() == ISD::ADD) {
2447       SDValue A, Xor;
2448 
2449       if (isBitwiseNot(N0.getOperand(0))) {
2450         A = N0.getOperand(1);
2451         Xor = N0.getOperand(0);
2452       } else if (isBitwiseNot(N0.getOperand(1))) {
2453         A = N0.getOperand(0);
2454         Xor = N0.getOperand(1);
2455       }
2456 
2457       if (Xor)
2458         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2459     }
2460 
2461     // Look for:
2462     //   add (add x, y), 1
2463     // And if the target does not like this form then turn into:
2464     //   sub y, (xor x, -1)
2465     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2466         N0.getOpcode() == ISD::ADD) {
2467       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2468                                 DAG.getAllOnesConstant(DL, VT));
2469       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2470     }
2471   }
2472 
2473   // (x - y) + -1  ->  add (xor y, -1), x
2474   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2475       isAllOnesOrAllOnesSplat(N1)) {
2476     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2477     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2478   }
2479 
2480   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2481     return Combined;
2482 
2483   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2484     return Combined;
2485 
2486   return SDValue();
2487 }
2488 
2489 SDValue DAGCombiner::visitADD(SDNode *N) {
2490   SDValue N0 = N->getOperand(0);
2491   SDValue N1 = N->getOperand(1);
2492   EVT VT = N0.getValueType();
2493   SDLoc DL(N);
2494 
2495   if (SDValue Combined = visitADDLike(N))
2496     return Combined;
2497 
2498   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2499     return V;
2500 
2501   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2502     return V;
2503 
2504   // fold (a+b) -> (a|b) iff a and b share no bits.
2505   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2506       DAG.haveNoCommonBitsSet(N0, N1))
2507     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2508 
2509   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2510   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2511     const APInt &C0 = N0->getConstantOperandAPInt(0);
2512     const APInt &C1 = N1->getConstantOperandAPInt(0);
2513     return DAG.getVScale(DL, VT, C0 + C1);
2514   }
2515 
2516   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2517   if ((N0.getOpcode() == ISD::ADD) &&
2518       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2519       (N1.getOpcode() == ISD::VSCALE)) {
2520     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2521     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2522     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2523     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2524   }
2525 
2526   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
2527   if (N0.getOpcode() == ISD::STEP_VECTOR &&
2528       N1.getOpcode() == ISD::STEP_VECTOR) {
2529     const APInt &C0 = N0->getConstantOperandAPInt(0);
2530     const APInt &C1 = N1->getConstantOperandAPInt(0);
2531     APInt NewStep = C0 + C1;
2532     return DAG.getStepVector(DL, VT, NewStep);
2533   }
2534 
2535   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2536   if ((N0.getOpcode() == ISD::ADD) &&
2537       (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2538       (N1.getOpcode() == ISD::STEP_VECTOR)) {
2539     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2540     const APInt &SV1 = N1->getConstantOperandAPInt(0);
2541     APInt NewStep = SV0 + SV1;
2542     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2543     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2544   }
2545 
2546   return SDValue();
2547 }
2548 
2549 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2550   unsigned Opcode = N->getOpcode();
2551   SDValue N0 = N->getOperand(0);
2552   SDValue N1 = N->getOperand(1);
2553   EVT VT = N0.getValueType();
2554   SDLoc DL(N);
2555 
2556   // fold vector ops
2557   if (VT.isVector()) {
2558     // TODO SimplifyVBinOp
2559 
2560     // fold (add_sat x, 0) -> x, vector edition
2561     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2562       return N0;
2563     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2564       return N1;
2565   }
2566 
2567   // fold (add_sat x, undef) -> -1
2568   if (N0.isUndef() || N1.isUndef())
2569     return DAG.getAllOnesConstant(DL, VT);
2570 
2571   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2572     // canonicalize constant to RHS
2573     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2574       return DAG.getNode(Opcode, DL, VT, N1, N0);
2575     // fold (add_sat c1, c2) -> c3
2576     return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2577   }
2578 
2579   // fold (add_sat x, 0) -> x
2580   if (isNullConstant(N1))
2581     return N0;
2582 
2583   // If it cannot overflow, transform into an add.
2584   if (Opcode == ISD::UADDSAT)
2585     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2586       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2587 
2588   return SDValue();
2589 }
2590 
2591 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2592   bool Masked = false;
2593 
2594   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2595   while (true) {
2596     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2597       V = V.getOperand(0);
2598       continue;
2599     }
2600 
2601     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2602       Masked = true;
2603       V = V.getOperand(0);
2604       continue;
2605     }
2606 
2607     break;
2608   }
2609 
2610   // If this is not a carry, return.
2611   if (V.getResNo() != 1)
2612     return SDValue();
2613 
2614   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2615       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2616     return SDValue();
2617 
2618   EVT VT = V.getNode()->getValueType(0);
2619   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2620     return SDValue();
2621 
2622   // If the result is masked, then no matter what kind of bool it is we can
2623   // return. If it isn't, then we need to make sure the bool type is either 0 or
2624   // 1 and not other values.
2625   if (Masked ||
2626       TLI.getBooleanContents(V.getValueType()) ==
2627           TargetLoweringBase::ZeroOrOneBooleanContent)
2628     return V;
2629 
2630   return SDValue();
2631 }
2632 
2633 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2634 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2635 /// the opcode and bypass the mask operation.
2636 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2637                                  SelectionDAG &DAG, const SDLoc &DL) {
2638   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2639     return SDValue();
2640 
2641   EVT VT = N0.getValueType();
2642   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2643     return SDValue();
2644 
2645   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2646   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2647   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2648 }
2649 
2650 /// Helper for doing combines based on N0 and N1 being added to each other.
2651 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2652                                           SDNode *LocReference) {
2653   EVT VT = N0.getValueType();
2654   SDLoc DL(LocReference);
2655 
2656   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2657   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2658       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2659     return DAG.getNode(ISD::SUB, DL, VT, N0,
2660                        DAG.getNode(ISD::SHL, DL, VT,
2661                                    N1.getOperand(0).getOperand(1),
2662                                    N1.getOperand(1)));
2663 
2664   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2665     return V;
2666 
2667   // Look for:
2668   //   add (add x, 1), y
2669   // And if the target does not like this form then turn into:
2670   //   sub y, (xor x, -1)
2671   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2672       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2673     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2674                               DAG.getAllOnesConstant(DL, VT));
2675     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2676   }
2677 
2678   // Hoist one-use subtraction by non-opaque constant:
2679   //   (x - C) + y  ->  (x + y) - C
2680   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2681   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2682       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2683     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2684     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2685   }
2686   // Hoist one-use subtraction from non-opaque constant:
2687   //   (C - x) + y  ->  (y - x) + C
2688   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2689       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2690     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2691     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2692   }
2693 
2694   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2695   // rather than 'add 0/-1' (the zext should get folded).
2696   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2697   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2698       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2699       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2700     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2701     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2702   }
2703 
2704   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2705   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2706     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2707     if (TN->getVT() == MVT::i1) {
2708       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2709                                  DAG.getConstant(1, DL, VT));
2710       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2711     }
2712   }
2713 
2714   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2715   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2716       N1.getResNo() == 0)
2717     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2718                        N0, N1.getOperand(0), N1.getOperand(2));
2719 
2720   // (add X, Carry) -> (addcarry X, 0, Carry)
2721   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2722     if (SDValue Carry = getAsCarry(TLI, N1))
2723       return DAG.getNode(ISD::ADDCARRY, DL,
2724                          DAG.getVTList(VT, Carry.getValueType()), N0,
2725                          DAG.getConstant(0, DL, VT), Carry);
2726 
2727   return SDValue();
2728 }
2729 
2730 SDValue DAGCombiner::visitADDC(SDNode *N) {
2731   SDValue N0 = N->getOperand(0);
2732   SDValue N1 = N->getOperand(1);
2733   EVT VT = N0.getValueType();
2734   SDLoc DL(N);
2735 
2736   // If the flag result is dead, turn this into an ADD.
2737   if (!N->hasAnyUseOfValue(1))
2738     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2739                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2740 
2741   // canonicalize constant to RHS.
2742   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2743   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2744   if (N0C && !N1C)
2745     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2746 
2747   // fold (addc x, 0) -> x + no carry out
2748   if (isNullConstant(N1))
2749     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2750                                         DL, MVT::Glue));
2751 
2752   // If it cannot overflow, transform into an add.
2753   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2754     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2755                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2756 
2757   return SDValue();
2758 }
2759 
2760 /**
2761  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2762  * then the flip also occurs if computing the inverse is the same cost.
2763  * This function returns an empty SDValue in case it cannot flip the boolean
2764  * without increasing the cost of the computation. If you want to flip a boolean
2765  * no matter what, use DAG.getLogicalNOT.
2766  */
2767 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2768                                   const TargetLowering &TLI,
2769                                   bool Force) {
2770   if (Force && isa<ConstantSDNode>(V))
2771     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2772 
2773   if (V.getOpcode() != ISD::XOR)
2774     return SDValue();
2775 
2776   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2777   if (!Const)
2778     return SDValue();
2779 
2780   EVT VT = V.getValueType();
2781 
2782   bool IsFlip = false;
2783   switch(TLI.getBooleanContents(VT)) {
2784     case TargetLowering::ZeroOrOneBooleanContent:
2785       IsFlip = Const->isOne();
2786       break;
2787     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2788       IsFlip = Const->isAllOnes();
2789       break;
2790     case TargetLowering::UndefinedBooleanContent:
2791       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2792       break;
2793   }
2794 
2795   if (IsFlip)
2796     return V.getOperand(0);
2797   if (Force)
2798     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2799   return SDValue();
2800 }
2801 
2802 SDValue DAGCombiner::visitADDO(SDNode *N) {
2803   SDValue N0 = N->getOperand(0);
2804   SDValue N1 = N->getOperand(1);
2805   EVT VT = N0.getValueType();
2806   bool IsSigned = (ISD::SADDO == N->getOpcode());
2807 
2808   EVT CarryVT = N->getValueType(1);
2809   SDLoc DL(N);
2810 
2811   // If the flag result is dead, turn this into an ADD.
2812   if (!N->hasAnyUseOfValue(1))
2813     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2814                      DAG.getUNDEF(CarryVT));
2815 
2816   // canonicalize constant to RHS.
2817   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2818       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2819     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2820 
2821   // fold (addo x, 0) -> x + no carry out
2822   if (isNullOrNullSplat(N1))
2823     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2824 
2825   if (!IsSigned) {
2826     // If it cannot overflow, transform into an add.
2827     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2828       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2829                        DAG.getConstant(0, DL, CarryVT));
2830 
2831     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2832     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2833       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2834                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2835       return CombineTo(
2836           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2837     }
2838 
2839     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2840       return Combined;
2841 
2842     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2843       return Combined;
2844   }
2845 
2846   return SDValue();
2847 }
2848 
2849 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2850   EVT VT = N0.getValueType();
2851   if (VT.isVector())
2852     return SDValue();
2853 
2854   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2855   // If Y + 1 cannot overflow.
2856   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2857     SDValue Y = N1.getOperand(0);
2858     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2859     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2860       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2861                          N1.getOperand(2));
2862   }
2863 
2864   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2865   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2866     if (SDValue Carry = getAsCarry(TLI, N1))
2867       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2868                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2869 
2870   return SDValue();
2871 }
2872 
2873 SDValue DAGCombiner::visitADDE(SDNode *N) {
2874   SDValue N0 = N->getOperand(0);
2875   SDValue N1 = N->getOperand(1);
2876   SDValue CarryIn = N->getOperand(2);
2877 
2878   // canonicalize constant to RHS
2879   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2880   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2881   if (N0C && !N1C)
2882     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2883                        N1, N0, CarryIn);
2884 
2885   // fold (adde x, y, false) -> (addc x, y)
2886   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2887     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2888 
2889   return SDValue();
2890 }
2891 
2892 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2893   SDValue N0 = N->getOperand(0);
2894   SDValue N1 = N->getOperand(1);
2895   SDValue CarryIn = N->getOperand(2);
2896   SDLoc DL(N);
2897 
2898   // canonicalize constant to RHS
2899   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2900   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2901   if (N0C && !N1C)
2902     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2903 
2904   // fold (addcarry x, y, false) -> (uaddo x, y)
2905   if (isNullConstant(CarryIn)) {
2906     if (!LegalOperations ||
2907         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2908       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2909   }
2910 
2911   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2912   if (isNullConstant(N0) && isNullConstant(N1)) {
2913     EVT VT = N0.getValueType();
2914     EVT CarryVT = CarryIn.getValueType();
2915     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2916     AddToWorklist(CarryExt.getNode());
2917     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2918                                     DAG.getConstant(1, DL, VT)),
2919                      DAG.getConstant(0, DL, CarryVT));
2920   }
2921 
2922   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2923     return Combined;
2924 
2925   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2926     return Combined;
2927 
2928   return SDValue();
2929 }
2930 
2931 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2932   SDValue N0 = N->getOperand(0);
2933   SDValue N1 = N->getOperand(1);
2934   SDValue CarryIn = N->getOperand(2);
2935   SDLoc DL(N);
2936 
2937   // canonicalize constant to RHS
2938   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2939   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2940   if (N0C && !N1C)
2941     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2942 
2943   // fold (saddo_carry x, y, false) -> (saddo x, y)
2944   if (isNullConstant(CarryIn)) {
2945     if (!LegalOperations ||
2946         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2947       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2948   }
2949 
2950   return SDValue();
2951 }
2952 
2953 /**
2954  * If we are facing some sort of diamond carry propapagtion pattern try to
2955  * break it up to generate something like:
2956  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2957  *
2958  * The end result is usually an increase in operation required, but because the
2959  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2960  *
2961  * Patterns typically look something like
2962  *            (uaddo A, B)
2963  *             /       \
2964  *          Carry      Sum
2965  *            |          \
2966  *            | (addcarry *, 0, Z)
2967  *            |       /
2968  *             \   Carry
2969  *              |   /
2970  * (addcarry X, *, *)
2971  *
2972  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2973  * produce a combine with a single path for carry propagation.
2974  */
2975 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2976                                       SDValue X, SDValue Carry0, SDValue Carry1,
2977                                       SDNode *N) {
2978   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2979     return SDValue();
2980   if (Carry1.getOpcode() != ISD::UADDO)
2981     return SDValue();
2982 
2983   SDValue Z;
2984 
2985   /**
2986    * First look for a suitable Z. It will present itself in the form of
2987    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2988    */
2989   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2990       isNullConstant(Carry0.getOperand(1))) {
2991     Z = Carry0.getOperand(2);
2992   } else if (Carry0.getOpcode() == ISD::UADDO &&
2993              isOneConstant(Carry0.getOperand(1))) {
2994     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2995     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2996   } else {
2997     // We couldn't find a suitable Z.
2998     return SDValue();
2999   }
3000 
3001 
3002   auto cancelDiamond = [&](SDValue A,SDValue B) {
3003     SDLoc DL(N);
3004     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3005     Combiner.AddToWorklist(NewY.getNode());
3006     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3007                        DAG.getConstant(0, DL, X.getValueType()),
3008                        NewY.getValue(1));
3009   };
3010 
3011   /**
3012    *      (uaddo A, B)
3013    *           |
3014    *          Sum
3015    *           |
3016    * (addcarry *, 0, Z)
3017    */
3018   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3019     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3020   }
3021 
3022   /**
3023    * (addcarry A, 0, Z)
3024    *         |
3025    *        Sum
3026    *         |
3027    *  (uaddo *, B)
3028    */
3029   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3030     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3031   }
3032 
3033   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3034     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3035   }
3036 
3037   return SDValue();
3038 }
3039 
3040 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3041 // match patterns like:
3042 //
3043 //          (uaddo A, B)            CarryIn
3044 //            |  \                     |
3045 //            |   \                    |
3046 //    PartialSum   PartialCarryOutX   /
3047 //            |        |             /
3048 //            |    ____|____________/
3049 //            |   /    |
3050 //     (uaddo *, *)    \________
3051 //       |  \                   \
3052 //       |   \                   |
3053 //       |    PartialCarryOutY   |
3054 //       |        \              |
3055 //       |         \            /
3056 //   AddCarrySum    |    ______/
3057 //                  |   /
3058 //   CarryOut = (or *, *)
3059 //
3060 // And generate ADDCARRY (or SUBCARRY) with two result values:
3061 //
3062 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3063 //
3064 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3065 // a single path for carry/borrow out propagation:
3066 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3067                                    const TargetLowering &TLI, SDValue Carry0,
3068                                    SDValue Carry1, SDNode *N) {
3069   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3070     return SDValue();
3071   unsigned Opcode = Carry0.getOpcode();
3072   if (Opcode != Carry1.getOpcode())
3073     return SDValue();
3074   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3075     return SDValue();
3076 
3077   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3078   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3079   // the above ASCII art.)
3080   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3081       Carry1.getOperand(1) != Carry0.getValue(0))
3082     std::swap(Carry0, Carry1);
3083   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3084       Carry1.getOperand(1) != Carry0.getValue(0))
3085     return SDValue();
3086 
3087   // The carry in value must be on the righthand side for subtraction.
3088   unsigned CarryInOperandNum =
3089       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3090   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3091     return SDValue();
3092   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3093 
3094   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3095   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3096     return SDValue();
3097 
3098   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3099   // TODO: make getAsCarry() aware of how partial carries are merged.
3100   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3101     return SDValue();
3102   CarryIn = CarryIn.getOperand(0);
3103   if (CarryIn.getValueType() != MVT::i1)
3104     return SDValue();
3105 
3106   SDLoc DL(N);
3107   SDValue Merged =
3108       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3109                   Carry0.getOperand(1), CarryIn);
3110 
3111   // Please note that because we have proven that the result of the UADDO/USUBO
3112   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3113   // therefore prove that if the first UADDO/USUBO overflows, the second
3114   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3115   // maximum value.
3116   //
3117   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3118   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3119   //
3120   // This is important because it means that OR and XOR can be used to merge
3121   // carry flags; and that AND can return a constant zero.
3122   //
3123   // TODO: match other operations that can merge flags (ADD, etc)
3124   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3125   if (N->getOpcode() == ISD::AND)
3126     return DAG.getConstant(0, DL, MVT::i1);
3127   return Merged.getValue(1);
3128 }
3129 
3130 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3131                                        SDNode *N) {
3132   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3133   if (isBitwiseNot(N0))
3134     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3135       SDLoc DL(N);
3136       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3137                                 N0.getOperand(0), NotC);
3138       return CombineTo(
3139           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3140     }
3141 
3142   // Iff the flag result is dead:
3143   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3144   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3145   // or the dependency between the instructions.
3146   if ((N0.getOpcode() == ISD::ADD ||
3147        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3148         N0.getValue(1) != CarryIn)) &&
3149       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3150     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3151                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3152 
3153   /**
3154    * When one of the addcarry argument is itself a carry, we may be facing
3155    * a diamond carry propagation. In which case we try to transform the DAG
3156    * to ensure linear carry propagation if that is possible.
3157    */
3158   if (auto Y = getAsCarry(TLI, N1)) {
3159     // Because both are carries, Y and Z can be swapped.
3160     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3161       return R;
3162     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3163       return R;
3164   }
3165 
3166   return SDValue();
3167 }
3168 
3169 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3170 // clamp/truncation if necessary.
3171 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3172                                    SDValue RHS, SelectionDAG &DAG,
3173                                    const SDLoc &DL) {
3174   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3175          "Illegal truncation");
3176 
3177   if (DstVT == SrcVT)
3178     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3179 
3180   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3181   // clamping RHS.
3182   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3183                                           DstVT.getScalarSizeInBits());
3184   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3185     return SDValue();
3186 
3187   SDValue SatLimit =
3188       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3189                                            DstVT.getScalarSizeInBits()),
3190                       DL, SrcVT);
3191   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3192   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3193   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3194   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3195 }
3196 
3197 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3198 // usubsat(a,b), optionally as a truncated type.
3199 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3200   if (N->getOpcode() != ISD::SUB ||
3201       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3202     return SDValue();
3203 
3204   EVT SubVT = N->getValueType(0);
3205   SDValue Op0 = N->getOperand(0);
3206   SDValue Op1 = N->getOperand(1);
3207 
3208   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3209   // they may be converted to usubsat(a,b).
3210   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3211     SDValue MaxLHS = Op0.getOperand(0);
3212     SDValue MaxRHS = Op0.getOperand(1);
3213     if (MaxLHS == Op1)
3214       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3215     if (MaxRHS == Op1)
3216       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3217   }
3218 
3219   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3220     SDValue MinLHS = Op1.getOperand(0);
3221     SDValue MinRHS = Op1.getOperand(1);
3222     if (MinLHS == Op0)
3223       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3224     if (MinRHS == Op0)
3225       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3226   }
3227 
3228   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3229   if (Op1.getOpcode() == ISD::TRUNCATE &&
3230       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3231       Op1.getOperand(0).hasOneUse()) {
3232     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3233     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3234     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3235       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3236                                  DAG, SDLoc(N));
3237     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3238       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3239                                  DAG, SDLoc(N));
3240   }
3241 
3242   return SDValue();
3243 }
3244 
3245 // Since it may not be valid to emit a fold to zero for vector initializers
3246 // check if we can before folding.
3247 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3248                              SelectionDAG &DAG, bool LegalOperations) {
3249   if (!VT.isVector())
3250     return DAG.getConstant(0, DL, VT);
3251   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3252     return DAG.getConstant(0, DL, VT);
3253   return SDValue();
3254 }
3255 
3256 SDValue DAGCombiner::visitSUB(SDNode *N) {
3257   SDValue N0 = N->getOperand(0);
3258   SDValue N1 = N->getOperand(1);
3259   EVT VT = N0.getValueType();
3260   SDLoc DL(N);
3261 
3262   // fold vector ops
3263   if (VT.isVector()) {
3264     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3265       return FoldedVOp;
3266 
3267     // fold (sub x, 0) -> x, vector edition
3268     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3269       return N0;
3270   }
3271 
3272   // fold (sub x, x) -> 0
3273   // FIXME: Refactor this and xor and other similar operations together.
3274   if (N0 == N1)
3275     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3276 
3277   // fold (sub c1, c2) -> c3
3278   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3279     return C;
3280 
3281   if (SDValue NewSel = foldBinOpIntoSelect(N))
3282     return NewSel;
3283 
3284   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3285 
3286   // fold (sub x, c) -> (add x, -c)
3287   if (N1C) {
3288     return DAG.getNode(ISD::ADD, DL, VT, N0,
3289                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3290   }
3291 
3292   if (isNullOrNullSplat(N0)) {
3293     unsigned BitWidth = VT.getScalarSizeInBits();
3294     // Right-shifting everything out but the sign bit followed by negation is
3295     // the same as flipping arithmetic/logical shift type without the negation:
3296     // -(X >>u 31) -> (X >>s 31)
3297     // -(X >>s 31) -> (X >>u 31)
3298     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3299       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3300       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3301         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3302         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3303           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3304       }
3305     }
3306 
3307     // 0 - X --> 0 if the sub is NUW.
3308     if (N->getFlags().hasNoUnsignedWrap())
3309       return N0;
3310 
3311     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3312       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3313       // N1 must be 0 because negating the minimum signed value is undefined.
3314       if (N->getFlags().hasNoSignedWrap())
3315         return N0;
3316 
3317       // 0 - X --> X if X is 0 or the minimum signed value.
3318       return N1;
3319     }
3320 
3321     // Convert 0 - abs(x).
3322     SDValue Result;
3323     if (N1->getOpcode() == ISD::ABS &&
3324         !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3325         TLI.expandABS(N1.getNode(), Result, DAG, true))
3326       return Result;
3327 
3328     // Fold neg(splat(neg(x)) -> splat(x)
3329     if (VT.isVector()) {
3330       SDValue N1S = DAG.getSplatValue(N1, true);
3331       if (N1S && N1S.getOpcode() == ISD::SUB &&
3332           isNullConstant(N1S.getOperand(0))) {
3333         if (VT.isScalableVector())
3334           return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3335         return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3336       }
3337     }
3338   }
3339 
3340   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3341   if (isAllOnesOrAllOnesSplat(N0))
3342     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3343 
3344   // fold (A - (0-B)) -> A+B
3345   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3346     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3347 
3348   // fold A-(A-B) -> B
3349   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3350     return N1.getOperand(1);
3351 
3352   // fold (A+B)-A -> B
3353   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3354     return N0.getOperand(1);
3355 
3356   // fold (A+B)-B -> A
3357   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3358     return N0.getOperand(0);
3359 
3360   // fold (A+C1)-C2 -> A+(C1-C2)
3361   if (N0.getOpcode() == ISD::ADD &&
3362       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3363       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3364     SDValue NewC =
3365         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3366     assert(NewC && "Constant folding failed");
3367     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3368   }
3369 
3370   // fold C2-(A+C1) -> (C2-C1)-A
3371   if (N1.getOpcode() == ISD::ADD) {
3372     SDValue N11 = N1.getOperand(1);
3373     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3374         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3375       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3376       assert(NewC && "Constant folding failed");
3377       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3378     }
3379   }
3380 
3381   // fold (A-C1)-C2 -> A-(C1+C2)
3382   if (N0.getOpcode() == ISD::SUB &&
3383       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3384       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3385     SDValue NewC =
3386         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3387     assert(NewC && "Constant folding failed");
3388     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3389   }
3390 
3391   // fold (c1-A)-c2 -> (c1-c2)-A
3392   if (N0.getOpcode() == ISD::SUB &&
3393       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3394       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3395     SDValue NewC =
3396         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3397     assert(NewC && "Constant folding failed");
3398     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3399   }
3400 
3401   // fold ((A+(B+or-C))-B) -> A+or-C
3402   if (N0.getOpcode() == ISD::ADD &&
3403       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3404        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3405       N0.getOperand(1).getOperand(0) == N1)
3406     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3407                        N0.getOperand(1).getOperand(1));
3408 
3409   // fold ((A+(C+B))-B) -> A+C
3410   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3411       N0.getOperand(1).getOperand(1) == N1)
3412     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3413                        N0.getOperand(1).getOperand(0));
3414 
3415   // fold ((A-(B-C))-C) -> A-B
3416   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3417       N0.getOperand(1).getOperand(1) == N1)
3418     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3419                        N0.getOperand(1).getOperand(0));
3420 
3421   // fold (A-(B-C)) -> A+(C-B)
3422   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3423     return DAG.getNode(ISD::ADD, DL, VT, N0,
3424                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3425                                    N1.getOperand(0)));
3426 
3427   // A - (A & B)  ->  A & (~B)
3428   if (N1.getOpcode() == ISD::AND) {
3429     SDValue A = N1.getOperand(0);
3430     SDValue B = N1.getOperand(1);
3431     if (A != N0)
3432       std::swap(A, B);
3433     if (A == N0 &&
3434         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3435       SDValue InvB =
3436           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3437       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3438     }
3439   }
3440 
3441   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3442   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3443     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3444         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3445       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3446                                 N1.getOperand(0).getOperand(1),
3447                                 N1.getOperand(1));
3448       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3449     }
3450     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3451         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3452       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3453                                 N1.getOperand(0),
3454                                 N1.getOperand(1).getOperand(1));
3455       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3456     }
3457   }
3458 
3459   // If either operand of a sub is undef, the result is undef
3460   if (N0.isUndef())
3461     return N0;
3462   if (N1.isUndef())
3463     return N1;
3464 
3465   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3466     return V;
3467 
3468   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3469     return V;
3470 
3471   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3472     return V;
3473 
3474   if (SDValue V = foldSubToUSubSat(VT, N))
3475     return V;
3476 
3477   // (x - y) - 1  ->  add (xor y, -1), x
3478   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3479     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3480                               DAG.getAllOnesConstant(DL, VT));
3481     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3482   }
3483 
3484   // Look for:
3485   //   sub y, (xor x, -1)
3486   // And if the target does not like this form then turn into:
3487   //   add (add x, y), 1
3488   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3489     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3490     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3491   }
3492 
3493   // Hoist one-use addition by non-opaque constant:
3494   //   (x + C) - y  ->  (x - y) + C
3495   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3496       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3497     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3498     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3499   }
3500   // y - (x + C)  ->  (y - x) - C
3501   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3502       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3503     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3504     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3505   }
3506   // (x - C) - y  ->  (x - y) - C
3507   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3508   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3509       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3510     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3511     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3512   }
3513   // (C - x) - y  ->  C - (x + y)
3514   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3515       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3516     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3517     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3518   }
3519 
3520   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3521   // rather than 'sub 0/1' (the sext should get folded).
3522   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3523   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3524       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3525       TLI.getBooleanContents(VT) ==
3526           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3527     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3528     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3529   }
3530 
3531   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3532   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3533     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3534       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3535       SDValue S0 = N1.getOperand(0);
3536       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3537         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3538           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3539             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3540     }
3541   }
3542 
3543   // If the relocation model supports it, consider symbol offsets.
3544   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3545     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3546       // fold (sub Sym, c) -> Sym-c
3547       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3548         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3549                                     GA->getOffset() -
3550                                         (uint64_t)N1C->getSExtValue());
3551       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3552       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3553         if (GA->getGlobal() == GB->getGlobal())
3554           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3555                                  DL, VT);
3556     }
3557 
3558   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3559   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3560     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3561     if (TN->getVT() == MVT::i1) {
3562       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3563                                  DAG.getConstant(1, DL, VT));
3564       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3565     }
3566   }
3567 
3568   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3569   if (N1.getOpcode() == ISD::VSCALE) {
3570     const APInt &IntVal = N1.getConstantOperandAPInt(0);
3571     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3572   }
3573 
3574   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3575   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3576     APInt NewStep = -N1.getConstantOperandAPInt(0);
3577     return DAG.getNode(ISD::ADD, DL, VT, N0,
3578                        DAG.getStepVector(DL, VT, NewStep));
3579   }
3580 
3581   // Prefer an add for more folding potential and possibly better codegen:
3582   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3583   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3584     SDValue ShAmt = N1.getOperand(1);
3585     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3586     if (ShAmtC &&
3587         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3588       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3589       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3590     }
3591   }
3592 
3593   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3594     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3595     if (SDValue Carry = getAsCarry(TLI, N0)) {
3596       SDValue X = N1;
3597       SDValue Zero = DAG.getConstant(0, DL, VT);
3598       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3599       return DAG.getNode(ISD::ADDCARRY, DL,
3600                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3601                          Carry);
3602     }
3603   }
3604 
3605   return SDValue();
3606 }
3607 
3608 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3609   SDValue N0 = N->getOperand(0);
3610   SDValue N1 = N->getOperand(1);
3611   EVT VT = N0.getValueType();
3612   SDLoc DL(N);
3613 
3614   // fold vector ops
3615   if (VT.isVector()) {
3616     // TODO SimplifyVBinOp
3617 
3618     // fold (sub_sat x, 0) -> x, vector edition
3619     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3620       return N0;
3621   }
3622 
3623   // fold (sub_sat x, undef) -> 0
3624   if (N0.isUndef() || N1.isUndef())
3625     return DAG.getConstant(0, DL, VT);
3626 
3627   // fold (sub_sat x, x) -> 0
3628   if (N0 == N1)
3629     return DAG.getConstant(0, DL, VT);
3630 
3631   // fold (sub_sat c1, c2) -> c3
3632   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3633     return C;
3634 
3635   // fold (sub_sat x, 0) -> x
3636   if (isNullConstant(N1))
3637     return N0;
3638 
3639   return SDValue();
3640 }
3641 
3642 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3643   SDValue N0 = N->getOperand(0);
3644   SDValue N1 = N->getOperand(1);
3645   EVT VT = N0.getValueType();
3646   SDLoc DL(N);
3647 
3648   // If the flag result is dead, turn this into an SUB.
3649   if (!N->hasAnyUseOfValue(1))
3650     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3651                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3652 
3653   // fold (subc x, x) -> 0 + no borrow
3654   if (N0 == N1)
3655     return CombineTo(N, DAG.getConstant(0, DL, VT),
3656                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3657 
3658   // fold (subc x, 0) -> x + no borrow
3659   if (isNullConstant(N1))
3660     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3661 
3662   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3663   if (isAllOnesConstant(N0))
3664     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3665                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3666 
3667   return SDValue();
3668 }
3669 
3670 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3671   SDValue N0 = N->getOperand(0);
3672   SDValue N1 = N->getOperand(1);
3673   EVT VT = N0.getValueType();
3674   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3675 
3676   EVT CarryVT = N->getValueType(1);
3677   SDLoc DL(N);
3678 
3679   // If the flag result is dead, turn this into an SUB.
3680   if (!N->hasAnyUseOfValue(1))
3681     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3682                      DAG.getUNDEF(CarryVT));
3683 
3684   // fold (subo x, x) -> 0 + no borrow
3685   if (N0 == N1)
3686     return CombineTo(N, DAG.getConstant(0, DL, VT),
3687                      DAG.getConstant(0, DL, CarryVT));
3688 
3689   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3690 
3691   // fold (subox, c) -> (addo x, -c)
3692   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3693     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3694                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3695   }
3696 
3697   // fold (subo x, 0) -> x + no borrow
3698   if (isNullOrNullSplat(N1))
3699     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3700 
3701   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3702   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3703     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3704                      DAG.getConstant(0, DL, CarryVT));
3705 
3706   return SDValue();
3707 }
3708 
3709 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3710   SDValue N0 = N->getOperand(0);
3711   SDValue N1 = N->getOperand(1);
3712   SDValue CarryIn = N->getOperand(2);
3713 
3714   // fold (sube x, y, false) -> (subc x, y)
3715   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3716     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3717 
3718   return SDValue();
3719 }
3720 
3721 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3722   SDValue N0 = N->getOperand(0);
3723   SDValue N1 = N->getOperand(1);
3724   SDValue CarryIn = N->getOperand(2);
3725 
3726   // fold (subcarry x, y, false) -> (usubo x, y)
3727   if (isNullConstant(CarryIn)) {
3728     if (!LegalOperations ||
3729         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3730       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3731   }
3732 
3733   return SDValue();
3734 }
3735 
3736 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3737   SDValue N0 = N->getOperand(0);
3738   SDValue N1 = N->getOperand(1);
3739   SDValue CarryIn = N->getOperand(2);
3740 
3741   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3742   if (isNullConstant(CarryIn)) {
3743     if (!LegalOperations ||
3744         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3745       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3746   }
3747 
3748   return SDValue();
3749 }
3750 
3751 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3752 // UMULFIXSAT here.
3753 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3754   SDValue N0 = N->getOperand(0);
3755   SDValue N1 = N->getOperand(1);
3756   SDValue Scale = N->getOperand(2);
3757   EVT VT = N0.getValueType();
3758 
3759   // fold (mulfix x, undef, scale) -> 0
3760   if (N0.isUndef() || N1.isUndef())
3761     return DAG.getConstant(0, SDLoc(N), VT);
3762 
3763   // Canonicalize constant to RHS (vector doesn't have to splat)
3764   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3765      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3766     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3767 
3768   // fold (mulfix x, 0, scale) -> 0
3769   if (isNullConstant(N1))
3770     return DAG.getConstant(0, SDLoc(N), VT);
3771 
3772   return SDValue();
3773 }
3774 
3775 SDValue DAGCombiner::visitMUL(SDNode *N) {
3776   SDValue N0 = N->getOperand(0);
3777   SDValue N1 = N->getOperand(1);
3778   EVT VT = N0.getValueType();
3779 
3780   // fold (mul x, undef) -> 0
3781   if (N0.isUndef() || N1.isUndef())
3782     return DAG.getConstant(0, SDLoc(N), VT);
3783 
3784   bool N1IsConst = false;
3785   bool N1IsOpaqueConst = false;
3786   APInt ConstValue1;
3787 
3788   // fold vector ops
3789   if (VT.isVector()) {
3790     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3791       return FoldedVOp;
3792 
3793     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3794     assert((!N1IsConst ||
3795             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3796            "Splat APInt should be element width");
3797   } else {
3798     N1IsConst = isa<ConstantSDNode>(N1);
3799     if (N1IsConst) {
3800       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3801       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3802     }
3803   }
3804 
3805   // fold (mul c1, c2) -> c1*c2
3806   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3807     return C;
3808 
3809   // canonicalize constant to RHS (vector doesn't have to splat)
3810   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3811      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3812     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3813 
3814   // fold (mul x, 0) -> 0
3815   if (N1IsConst && ConstValue1.isZero())
3816     return N1;
3817 
3818   // fold (mul x, 1) -> x
3819   if (N1IsConst && ConstValue1.isOne())
3820     return N0;
3821 
3822   if (SDValue NewSel = foldBinOpIntoSelect(N))
3823     return NewSel;
3824 
3825   // fold (mul x, -1) -> 0-x
3826   if (N1IsConst && ConstValue1.isAllOnes()) {
3827     SDLoc DL(N);
3828     return DAG.getNode(ISD::SUB, DL, VT,
3829                        DAG.getConstant(0, DL, VT), N0);
3830   }
3831 
3832   // fold (mul x, (1 << c)) -> x << c
3833   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3834       DAG.isKnownToBeAPowerOfTwo(N1) &&
3835       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3836     SDLoc DL(N);
3837     SDValue LogBase2 = BuildLogBase2(N1, DL);
3838     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3839     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3840     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3841   }
3842 
3843   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3844   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3845     unsigned Log2Val = (-ConstValue1).logBase2();
3846     SDLoc DL(N);
3847     // FIXME: If the input is something that is easily negated (e.g. a
3848     // single-use add), we should put the negate there.
3849     return DAG.getNode(ISD::SUB, DL, VT,
3850                        DAG.getConstant(0, DL, VT),
3851                        DAG.getNode(ISD::SHL, DL, VT, N0,
3852                             DAG.getConstant(Log2Val, DL,
3853                                       getShiftAmountTy(N0.getValueType()))));
3854   }
3855 
3856   // Try to transform:
3857   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3858   // mul x, (2^N + 1) --> add (shl x, N), x
3859   // mul x, (2^N - 1) --> sub (shl x, N), x
3860   // Examples: x * 33 --> (x << 5) + x
3861   //           x * 15 --> (x << 4) - x
3862   //           x * -33 --> -((x << 5) + x)
3863   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3864   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3865   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3866   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3867   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3868   //           x * 0xf800 --> (x << 16) - (x << 11)
3869   //           x * -0x8800 --> -((x << 15) + (x << 11))
3870   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3871   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3872     // TODO: We could handle more general decomposition of any constant by
3873     //       having the target set a limit on number of ops and making a
3874     //       callback to determine that sequence (similar to sqrt expansion).
3875     unsigned MathOp = ISD::DELETED_NODE;
3876     APInt MulC = ConstValue1.abs();
3877     // The constant `2` should be treated as (2^0 + 1).
3878     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3879     MulC.lshrInPlace(TZeros);
3880     if ((MulC - 1).isPowerOf2())
3881       MathOp = ISD::ADD;
3882     else if ((MulC + 1).isPowerOf2())
3883       MathOp = ISD::SUB;
3884 
3885     if (MathOp != ISD::DELETED_NODE) {
3886       unsigned ShAmt =
3887           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3888       ShAmt += TZeros;
3889       assert(ShAmt < VT.getScalarSizeInBits() &&
3890              "multiply-by-constant generated out of bounds shift");
3891       SDLoc DL(N);
3892       SDValue Shl =
3893           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3894       SDValue R =
3895           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3896                                DAG.getNode(ISD::SHL, DL, VT, N0,
3897                                            DAG.getConstant(TZeros, DL, VT)))
3898                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3899       if (ConstValue1.isNegative())
3900         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3901       return R;
3902     }
3903   }
3904 
3905   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3906   if (N0.getOpcode() == ISD::SHL &&
3907       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3908       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3909     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3910     if (isConstantOrConstantVector(C3))
3911       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3912   }
3913 
3914   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3915   // use.
3916   {
3917     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3918 
3919     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3920     if (N0.getOpcode() == ISD::SHL &&
3921         isConstantOrConstantVector(N0.getOperand(1)) &&
3922         N0.getNode()->hasOneUse()) {
3923       Sh = N0; Y = N1;
3924     } else if (N1.getOpcode() == ISD::SHL &&
3925                isConstantOrConstantVector(N1.getOperand(1)) &&
3926                N1.getNode()->hasOneUse()) {
3927       Sh = N1; Y = N0;
3928     }
3929 
3930     if (Sh.getNode()) {
3931       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3932       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3933     }
3934   }
3935 
3936   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3937   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3938       N0.getOpcode() == ISD::ADD &&
3939       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3940       isMulAddWithConstProfitable(N, N0, N1))
3941       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3942                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3943                                      N0.getOperand(0), N1),
3944                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3945                                      N0.getOperand(1), N1));
3946 
3947   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3948   if (N0.getOpcode() == ISD::VSCALE)
3949     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3950       const APInt &C0 = N0.getConstantOperandAPInt(0);
3951       const APInt &C1 = NC1->getAPIntValue();
3952       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3953     }
3954 
3955   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3956   APInt MulVal;
3957   if (N0.getOpcode() == ISD::STEP_VECTOR)
3958     if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3959       const APInt &C0 = N0.getConstantOperandAPInt(0);
3960       APInt NewStep = C0 * MulVal;
3961       return DAG.getStepVector(SDLoc(N), VT, NewStep);
3962     }
3963 
3964   // Fold ((mul x, 0/undef) -> 0,
3965   //       (mul x, 1) -> x) -> x)
3966   // -> and(x, mask)
3967   // We can replace vectors with '0' and '1' factors with a clearing mask.
3968   if (VT.isFixedLengthVector()) {
3969     unsigned NumElts = VT.getVectorNumElements();
3970     SmallBitVector ClearMask;
3971     ClearMask.reserve(NumElts);
3972     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3973       if (!V || V->isZero()) {
3974         ClearMask.push_back(true);
3975         return true;
3976       }
3977       ClearMask.push_back(false);
3978       return V->isOne();
3979     };
3980     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3981         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3982       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3983       SDLoc DL(N);
3984       EVT LegalSVT = N1.getOperand(0).getValueType();
3985       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3986       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3987       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3988       for (unsigned I = 0; I != NumElts; ++I)
3989         if (ClearMask[I])
3990           Mask[I] = Zero;
3991       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3992     }
3993   }
3994 
3995   // reassociate mul
3996   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3997     return RMUL;
3998 
3999   return SDValue();
4000 }
4001 
4002 /// Return true if divmod libcall is available.
4003 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4004                                      const TargetLowering &TLI) {
4005   RTLIB::Libcall LC;
4006   EVT NodeType = Node->getValueType(0);
4007   if (!NodeType.isSimple())
4008     return false;
4009   switch (NodeType.getSimpleVT().SimpleTy) {
4010   default: return false; // No libcall for vector types.
4011   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4012   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4013   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4014   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4015   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4016   }
4017 
4018   return TLI.getLibcallName(LC) != nullptr;
4019 }
4020 
4021 /// Issue divrem if both quotient and remainder are needed.
4022 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4023   if (Node->use_empty())
4024     return SDValue(); // This is a dead node, leave it alone.
4025 
4026   unsigned Opcode = Node->getOpcode();
4027   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4028   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4029 
4030   // DivMod lib calls can still work on non-legal types if using lib-calls.
4031   EVT VT = Node->getValueType(0);
4032   if (VT.isVector() || !VT.isInteger())
4033     return SDValue();
4034 
4035   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4036     return SDValue();
4037 
4038   // If DIVREM is going to get expanded into a libcall,
4039   // but there is no libcall available, then don't combine.
4040   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4041       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4042     return SDValue();
4043 
4044   // If div is legal, it's better to do the normal expansion
4045   unsigned OtherOpcode = 0;
4046   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4047     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4048     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4049       return SDValue();
4050   } else {
4051     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4052     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4053       return SDValue();
4054   }
4055 
4056   SDValue Op0 = Node->getOperand(0);
4057   SDValue Op1 = Node->getOperand(1);
4058   SDValue combined;
4059   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
4060          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
4061     SDNode *User = *UI;
4062     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4063         User->use_empty())
4064       continue;
4065     // Convert the other matching node(s), too;
4066     // otherwise, the DIVREM may get target-legalized into something
4067     // target-specific that we won't be able to recognize.
4068     unsigned UserOpc = User->getOpcode();
4069     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4070         User->getOperand(0) == Op0 &&
4071         User->getOperand(1) == Op1) {
4072       if (!combined) {
4073         if (UserOpc == OtherOpcode) {
4074           SDVTList VTs = DAG.getVTList(VT, VT);
4075           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4076         } else if (UserOpc == DivRemOpc) {
4077           combined = SDValue(User, 0);
4078         } else {
4079           assert(UserOpc == Opcode);
4080           continue;
4081         }
4082       }
4083       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4084         CombineTo(User, combined);
4085       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4086         CombineTo(User, combined.getValue(1));
4087     }
4088   }
4089   return combined;
4090 }
4091 
4092 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4093   SDValue N0 = N->getOperand(0);
4094   SDValue N1 = N->getOperand(1);
4095   EVT VT = N->getValueType(0);
4096   SDLoc DL(N);
4097 
4098   unsigned Opc = N->getOpcode();
4099   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4100   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4101 
4102   // X / undef -> undef
4103   // X % undef -> undef
4104   // X / 0 -> undef
4105   // X % 0 -> undef
4106   // NOTE: This includes vectors where any divisor element is zero/undef.
4107   if (DAG.isUndef(Opc, {N0, N1}))
4108     return DAG.getUNDEF(VT);
4109 
4110   // undef / X -> 0
4111   // undef % X -> 0
4112   if (N0.isUndef())
4113     return DAG.getConstant(0, DL, VT);
4114 
4115   // 0 / X -> 0
4116   // 0 % X -> 0
4117   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4118   if (N0C && N0C->isZero())
4119     return N0;
4120 
4121   // X / X -> 1
4122   // X % X -> 0
4123   if (N0 == N1)
4124     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4125 
4126   // X / 1 -> X
4127   // X % 1 -> 0
4128   // If this is a boolean op (single-bit element type), we can't have
4129   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4130   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4131   // it's a 1.
4132   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4133     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4134 
4135   return SDValue();
4136 }
4137 
4138 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4139   SDValue N0 = N->getOperand(0);
4140   SDValue N1 = N->getOperand(1);
4141   EVT VT = N->getValueType(0);
4142   EVT CCVT = getSetCCResultType(VT);
4143 
4144   // fold vector ops
4145   if (VT.isVector())
4146     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4147       return FoldedVOp;
4148 
4149   SDLoc DL(N);
4150 
4151   // fold (sdiv c1, c2) -> c1/c2
4152   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4153   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4154     return C;
4155 
4156   // fold (sdiv X, -1) -> 0-X
4157   if (N1C && N1C->isAllOnes())
4158     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4159 
4160   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4161   if (N1C && N1C->getAPIntValue().isMinSignedValue())
4162     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4163                          DAG.getConstant(1, DL, VT),
4164                          DAG.getConstant(0, DL, VT));
4165 
4166   if (SDValue V = simplifyDivRem(N, DAG))
4167     return V;
4168 
4169   if (SDValue NewSel = foldBinOpIntoSelect(N))
4170     return NewSel;
4171 
4172   // If we know the sign bits of both operands are zero, strength reduce to a
4173   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4174   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4175     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4176 
4177   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4178     // If the corresponding remainder node exists, update its users with
4179     // (Dividend - (Quotient * Divisor).
4180     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4181                                               { N0, N1 })) {
4182       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4183       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4184       AddToWorklist(Mul.getNode());
4185       AddToWorklist(Sub.getNode());
4186       CombineTo(RemNode, Sub);
4187     }
4188     return V;
4189   }
4190 
4191   // sdiv, srem -> sdivrem
4192   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4193   // true.  Otherwise, we break the simplification logic in visitREM().
4194   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4195   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4196     if (SDValue DivRem = useDivRem(N))
4197         return DivRem;
4198 
4199   return SDValue();
4200 }
4201 
4202 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4203   SDLoc DL(N);
4204   EVT VT = N->getValueType(0);
4205   EVT CCVT = getSetCCResultType(VT);
4206   unsigned BitWidth = VT.getScalarSizeInBits();
4207 
4208   // Helper for determining whether a value is a power-2 constant scalar or a
4209   // vector of such elements.
4210   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4211     if (C->isZero() || C->isOpaque())
4212       return false;
4213     if (C->getAPIntValue().isPowerOf2())
4214       return true;
4215     if ((-C->getAPIntValue()).isPowerOf2())
4216       return true;
4217     return false;
4218   };
4219 
4220   // fold (sdiv X, pow2) -> simple ops after legalize
4221   // FIXME: We check for the exact bit here because the generic lowering gives
4222   // better results in that case. The target-specific lowering should learn how
4223   // to handle exact sdivs efficiently.
4224   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4225     // Target-specific implementation of sdiv x, pow2.
4226     if (SDValue Res = BuildSDIVPow2(N))
4227       return Res;
4228 
4229     // Create constants that are functions of the shift amount value.
4230     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4231     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4232     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4233     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4234     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4235     if (!isConstantOrConstantVector(Inexact))
4236       return SDValue();
4237 
4238     // Splat the sign bit into the register
4239     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4240                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4241     AddToWorklist(Sign.getNode());
4242 
4243     // Add (N0 < 0) ? abs2 - 1 : 0;
4244     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4245     AddToWorklist(Srl.getNode());
4246     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4247     AddToWorklist(Add.getNode());
4248     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4249     AddToWorklist(Sra.getNode());
4250 
4251     // Special case: (sdiv X, 1) -> X
4252     // Special Case: (sdiv X, -1) -> 0-X
4253     SDValue One = DAG.getConstant(1, DL, VT);
4254     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4255     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4256     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4257     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4258     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4259 
4260     // If dividing by a positive value, we're done. Otherwise, the result must
4261     // be negated.
4262     SDValue Zero = DAG.getConstant(0, DL, VT);
4263     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4264 
4265     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4266     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4267     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4268     return Res;
4269   }
4270 
4271   // If integer divide is expensive and we satisfy the requirements, emit an
4272   // alternate sequence.  Targets may check function attributes for size/speed
4273   // trade-offs.
4274   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4275   if (isConstantOrConstantVector(N1) &&
4276       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4277     if (SDValue Op = BuildSDIV(N))
4278       return Op;
4279 
4280   return SDValue();
4281 }
4282 
4283 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4284   SDValue N0 = N->getOperand(0);
4285   SDValue N1 = N->getOperand(1);
4286   EVT VT = N->getValueType(0);
4287   EVT CCVT = getSetCCResultType(VT);
4288 
4289   // fold vector ops
4290   if (VT.isVector())
4291     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4292       return FoldedVOp;
4293 
4294   SDLoc DL(N);
4295 
4296   // fold (udiv c1, c2) -> c1/c2
4297   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4298   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4299     return C;
4300 
4301   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4302   if (N1C && N1C->isAllOnes())
4303     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4304                          DAG.getConstant(1, DL, VT),
4305                          DAG.getConstant(0, DL, VT));
4306 
4307   if (SDValue V = simplifyDivRem(N, DAG))
4308     return V;
4309 
4310   if (SDValue NewSel = foldBinOpIntoSelect(N))
4311     return NewSel;
4312 
4313   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4314     // If the corresponding remainder node exists, update its users with
4315     // (Dividend - (Quotient * Divisor).
4316     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4317                                               { N0, N1 })) {
4318       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4319       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4320       AddToWorklist(Mul.getNode());
4321       AddToWorklist(Sub.getNode());
4322       CombineTo(RemNode, Sub);
4323     }
4324     return V;
4325   }
4326 
4327   // sdiv, srem -> sdivrem
4328   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4329   // true.  Otherwise, we break the simplification logic in visitREM().
4330   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4331   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4332     if (SDValue DivRem = useDivRem(N))
4333         return DivRem;
4334 
4335   return SDValue();
4336 }
4337 
4338 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4339   SDLoc DL(N);
4340   EVT VT = N->getValueType(0);
4341 
4342   // fold (udiv x, (1 << c)) -> x >>u c
4343   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4344       DAG.isKnownToBeAPowerOfTwo(N1)) {
4345     SDValue LogBase2 = BuildLogBase2(N1, DL);
4346     AddToWorklist(LogBase2.getNode());
4347 
4348     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4349     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4350     AddToWorklist(Trunc.getNode());
4351     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4352   }
4353 
4354   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4355   if (N1.getOpcode() == ISD::SHL) {
4356     SDValue N10 = N1.getOperand(0);
4357     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4358         DAG.isKnownToBeAPowerOfTwo(N10)) {
4359       SDValue LogBase2 = BuildLogBase2(N10, DL);
4360       AddToWorklist(LogBase2.getNode());
4361 
4362       EVT ADDVT = N1.getOperand(1).getValueType();
4363       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4364       AddToWorklist(Trunc.getNode());
4365       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4366       AddToWorklist(Add.getNode());
4367       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4368     }
4369   }
4370 
4371   // fold (udiv x, c) -> alternate
4372   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4373   if (isConstantOrConstantVector(N1) &&
4374       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4375     if (SDValue Op = BuildUDIV(N))
4376       return Op;
4377 
4378   return SDValue();
4379 }
4380 
4381 // handles ISD::SREM and ISD::UREM
4382 SDValue DAGCombiner::visitREM(SDNode *N) {
4383   unsigned Opcode = N->getOpcode();
4384   SDValue N0 = N->getOperand(0);
4385   SDValue N1 = N->getOperand(1);
4386   EVT VT = N->getValueType(0);
4387   EVT CCVT = getSetCCResultType(VT);
4388 
4389   bool isSigned = (Opcode == ISD::SREM);
4390   SDLoc DL(N);
4391 
4392   // fold (rem c1, c2) -> c1%c2
4393   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4394   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4395     return C;
4396 
4397   // fold (urem X, -1) -> select(X == -1, 0, x)
4398   if (!isSigned && N1C && N1C->isAllOnes())
4399     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4400                          DAG.getConstant(0, DL, VT), N0);
4401 
4402   if (SDValue V = simplifyDivRem(N, DAG))
4403     return V;
4404 
4405   if (SDValue NewSel = foldBinOpIntoSelect(N))
4406     return NewSel;
4407 
4408   if (isSigned) {
4409     // If we know the sign bits of both operands are zero, strength reduce to a
4410     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4411     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4412       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4413   } else {
4414     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4415       // fold (urem x, pow2) -> (and x, pow2-1)
4416       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4417       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4418       AddToWorklist(Add.getNode());
4419       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4420     }
4421     if (N1.getOpcode() == ISD::SHL &&
4422         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4423       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4424       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4425       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4426       AddToWorklist(Add.getNode());
4427       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4428     }
4429   }
4430 
4431   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4432 
4433   // If X/C can be simplified by the division-by-constant logic, lower
4434   // X%C to the equivalent of X-X/C*C.
4435   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4436   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4437   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4438   // combine will not return a DIVREM.  Regardless, checking cheapness here
4439   // makes sense since the simplification results in fatter code.
4440   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4441     SDValue OptimizedDiv =
4442         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4443     if (OptimizedDiv.getNode()) {
4444       // If the equivalent Div node also exists, update its users.
4445       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4446       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4447                                                 { N0, N1 }))
4448         CombineTo(DivNode, OptimizedDiv);
4449       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4450       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4451       AddToWorklist(OptimizedDiv.getNode());
4452       AddToWorklist(Mul.getNode());
4453       return Sub;
4454     }
4455   }
4456 
4457   // sdiv, srem -> sdivrem
4458   if (SDValue DivRem = useDivRem(N))
4459     return DivRem.getValue(1);
4460 
4461   return SDValue();
4462 }
4463 
4464 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4465   SDValue N0 = N->getOperand(0);
4466   SDValue N1 = N->getOperand(1);
4467   EVT VT = N->getValueType(0);
4468   SDLoc DL(N);
4469 
4470   if (VT.isVector()) {
4471     // fold (mulhs x, 0) -> 0
4472     // do not return N0/N1, because undef node may exist.
4473     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4474         ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4475       return DAG.getConstant(0, DL, VT);
4476   }
4477 
4478   // fold (mulhs c1, c2)
4479   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4480     return C;
4481 
4482   // canonicalize constant to RHS.
4483   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4484       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4485     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4486 
4487   // fold (mulhs x, 0) -> 0
4488   if (isNullConstant(N1))
4489     return N1;
4490   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4491   if (isOneConstant(N1))
4492     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4493                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4494                                        getShiftAmountTy(N0.getValueType())));
4495 
4496   // fold (mulhs x, undef) -> 0
4497   if (N0.isUndef() || N1.isUndef())
4498     return DAG.getConstant(0, DL, VT);
4499 
4500   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4501   // plus a shift.
4502   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4503       !VT.isVector()) {
4504     MVT Simple = VT.getSimpleVT();
4505     unsigned SimpleSize = Simple.getSizeInBits();
4506     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4507     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4508       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4509       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4510       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4511       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4512             DAG.getConstant(SimpleSize, DL,
4513                             getShiftAmountTy(N1.getValueType())));
4514       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4515     }
4516   }
4517 
4518   return SDValue();
4519 }
4520 
4521 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4522   SDValue N0 = N->getOperand(0);
4523   SDValue N1 = N->getOperand(1);
4524   EVT VT = N->getValueType(0);
4525   SDLoc DL(N);
4526 
4527   if (VT.isVector()) {
4528     // fold (mulhu x, 0) -> 0
4529     // do not return N0/N1, because undef node may exist.
4530     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4531         ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4532       return DAG.getConstant(0, DL, VT);
4533   }
4534 
4535   // fold (mulhu c1, c2)
4536   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4537     return C;
4538 
4539   // canonicalize constant to RHS.
4540   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4541       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4542     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4543 
4544   // fold (mulhu x, 0) -> 0
4545   if (isNullConstant(N1))
4546     return N1;
4547   // fold (mulhu x, 1) -> 0
4548   if (isOneConstant(N1))
4549     return DAG.getConstant(0, DL, N0.getValueType());
4550   // fold (mulhu x, undef) -> 0
4551   if (N0.isUndef() || N1.isUndef())
4552     return DAG.getConstant(0, DL, VT);
4553 
4554   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4555   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4556       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4557     unsigned NumEltBits = VT.getScalarSizeInBits();
4558     SDValue LogBase2 = BuildLogBase2(N1, DL);
4559     SDValue SRLAmt = DAG.getNode(
4560         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4561     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4562     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4563     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4564   }
4565 
4566   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4567   // plus a shift.
4568   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4569       !VT.isVector()) {
4570     MVT Simple = VT.getSimpleVT();
4571     unsigned SimpleSize = Simple.getSizeInBits();
4572     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4573     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4574       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4575       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4576       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4577       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4578             DAG.getConstant(SimpleSize, DL,
4579                             getShiftAmountTy(N1.getValueType())));
4580       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4581     }
4582   }
4583 
4584   // Simplify the operands using demanded-bits information.
4585   // We don't have demanded bits support for MULHU so this just enables constant
4586   // folding based on known bits.
4587   if (SimplifyDemandedBits(SDValue(N, 0)))
4588     return SDValue(N, 0);
4589 
4590   return SDValue();
4591 }
4592 
4593 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4594 /// give the opcodes for the two computations that are being performed. Return
4595 /// true if a simplification was made.
4596 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4597                                                 unsigned HiOp) {
4598   // If the high half is not needed, just compute the low half.
4599   bool HiExists = N->hasAnyUseOfValue(1);
4600   if (!HiExists && (!LegalOperations ||
4601                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4602     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4603     return CombineTo(N, Res, Res);
4604   }
4605 
4606   // If the low half is not needed, just compute the high half.
4607   bool LoExists = N->hasAnyUseOfValue(0);
4608   if (!LoExists && (!LegalOperations ||
4609                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4610     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4611     return CombineTo(N, Res, Res);
4612   }
4613 
4614   // If both halves are used, return as it is.
4615   if (LoExists && HiExists)
4616     return SDValue();
4617 
4618   // If the two computed results can be simplified separately, separate them.
4619   if (LoExists) {
4620     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4621     AddToWorklist(Lo.getNode());
4622     SDValue LoOpt = combine(Lo.getNode());
4623     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4624         (!LegalOperations ||
4625          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4626       return CombineTo(N, LoOpt, LoOpt);
4627   }
4628 
4629   if (HiExists) {
4630     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4631     AddToWorklist(Hi.getNode());
4632     SDValue HiOpt = combine(Hi.getNode());
4633     if (HiOpt.getNode() && HiOpt != Hi &&
4634         (!LegalOperations ||
4635          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4636       return CombineTo(N, HiOpt, HiOpt);
4637   }
4638 
4639   return SDValue();
4640 }
4641 
4642 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4643   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4644     return Res;
4645 
4646   EVT VT = N->getValueType(0);
4647   SDLoc DL(N);
4648 
4649   // If the type is twice as wide is legal, transform the mulhu to a wider
4650   // multiply plus a shift.
4651   if (VT.isSimple() && !VT.isVector()) {
4652     MVT Simple = VT.getSimpleVT();
4653     unsigned SimpleSize = Simple.getSizeInBits();
4654     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4655     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4656       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4657       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4658       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4659       // Compute the high part as N1.
4660       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4661             DAG.getConstant(SimpleSize, DL,
4662                             getShiftAmountTy(Lo.getValueType())));
4663       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4664       // Compute the low part as N0.
4665       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4666       return CombineTo(N, Lo, Hi);
4667     }
4668   }
4669 
4670   return SDValue();
4671 }
4672 
4673 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4674   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4675     return Res;
4676 
4677   EVT VT = N->getValueType(0);
4678   SDLoc DL(N);
4679 
4680   // (umul_lohi N0, 0) -> (0, 0)
4681   if (isNullConstant(N->getOperand(1))) {
4682     SDValue Zero = DAG.getConstant(0, DL, VT);
4683     return CombineTo(N, Zero, Zero);
4684   }
4685 
4686   // (umul_lohi N0, 1) -> (N0, 0)
4687   if (isOneConstant(N->getOperand(1))) {
4688     SDValue Zero = DAG.getConstant(0, DL, VT);
4689     return CombineTo(N, N->getOperand(0), Zero);
4690   }
4691 
4692   // If the type is twice as wide is legal, transform the mulhu to a wider
4693   // multiply plus a shift.
4694   if (VT.isSimple() && !VT.isVector()) {
4695     MVT Simple = VT.getSimpleVT();
4696     unsigned SimpleSize = Simple.getSizeInBits();
4697     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4698     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4699       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4700       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4701       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4702       // Compute the high part as N1.
4703       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4704             DAG.getConstant(SimpleSize, DL,
4705                             getShiftAmountTy(Lo.getValueType())));
4706       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4707       // Compute the low part as N0.
4708       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4709       return CombineTo(N, Lo, Hi);
4710     }
4711   }
4712 
4713   return SDValue();
4714 }
4715 
4716 SDValue DAGCombiner::visitMULO(SDNode *N) {
4717   SDValue N0 = N->getOperand(0);
4718   SDValue N1 = N->getOperand(1);
4719   EVT VT = N0.getValueType();
4720   bool IsSigned = (ISD::SMULO == N->getOpcode());
4721 
4722   EVT CarryVT = N->getValueType(1);
4723   SDLoc DL(N);
4724 
4725   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4726   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4727 
4728   // fold operation with constant operands.
4729   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4730   // multiple results.
4731   if (N0C && N1C) {
4732     bool Overflow;
4733     APInt Result =
4734         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4735                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4736     return CombineTo(N, DAG.getConstant(Result, DL, VT),
4737                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4738   }
4739 
4740   // canonicalize constant to RHS.
4741   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4742       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4743     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4744 
4745   // fold (mulo x, 0) -> 0 + no carry out
4746   if (isNullOrNullSplat(N1))
4747     return CombineTo(N, DAG.getConstant(0, DL, VT),
4748                      DAG.getConstant(0, DL, CarryVT));
4749 
4750   // (mulo x, 2) -> (addo x, x)
4751   if (N1C && N1C->getAPIntValue() == 2)
4752     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4753                        N->getVTList(), N0, N0);
4754 
4755   if (IsSigned) {
4756     // A 1 bit SMULO overflows if both inputs are 1.
4757     if (VT.getScalarSizeInBits() == 1) {
4758       SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4759       return CombineTo(N, And,
4760                        DAG.getSetCC(DL, CarryVT, And,
4761                                     DAG.getConstant(0, DL, VT), ISD::SETNE));
4762     }
4763 
4764     // Multiplying n * m significant bits yields a result of n + m significant
4765     // bits. If the total number of significant bits does not exceed the
4766     // result bit width (minus 1), there is no overflow.
4767     unsigned SignBits = DAG.ComputeNumSignBits(N0);
4768     if (SignBits > 1)
4769       SignBits += DAG.ComputeNumSignBits(N1);
4770     if (SignBits > VT.getScalarSizeInBits() + 1)
4771       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4772                        DAG.getConstant(0, DL, CarryVT));
4773   } else {
4774     KnownBits N1Known = DAG.computeKnownBits(N1);
4775     KnownBits N0Known = DAG.computeKnownBits(N0);
4776     bool Overflow;
4777     (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4778     if (!Overflow)
4779       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4780                        DAG.getConstant(0, DL, CarryVT));
4781   }
4782 
4783   return SDValue();
4784 }
4785 
4786 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4787   SDValue N0 = N->getOperand(0);
4788   SDValue N1 = N->getOperand(1);
4789   EVT VT = N0.getValueType();
4790   unsigned Opcode = N->getOpcode();
4791 
4792   // fold vector ops
4793   if (VT.isVector())
4794     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4795       return FoldedVOp;
4796 
4797   // fold operation with constant operands.
4798   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4799     return C;
4800 
4801   // canonicalize constant to RHS
4802   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4803       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4804     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4805 
4806   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4807   // Only do this if the current op isn't legal and the flipped is.
4808   if (!TLI.isOperationLegal(Opcode, VT) &&
4809       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4810       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4811     unsigned AltOpcode;
4812     switch (Opcode) {
4813     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4814     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4815     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4816     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4817     default: llvm_unreachable("Unknown MINMAX opcode");
4818     }
4819     if (TLI.isOperationLegal(AltOpcode, VT))
4820       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4821   }
4822 
4823   // Simplify the operands using demanded-bits information.
4824   if (SimplifyDemandedBits(SDValue(N, 0)))
4825     return SDValue(N, 0);
4826 
4827   return SDValue();
4828 }
4829 
4830 /// If this is a bitwise logic instruction and both operands have the same
4831 /// opcode, try to sink the other opcode after the logic instruction.
4832 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4833   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4834   EVT VT = N0.getValueType();
4835   unsigned LogicOpcode = N->getOpcode();
4836   unsigned HandOpcode = N0.getOpcode();
4837   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4838           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4839   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4840 
4841   // Bail early if none of these transforms apply.
4842   if (N0.getNumOperands() == 0)
4843     return SDValue();
4844 
4845   // FIXME: We should check number of uses of the operands to not increase
4846   //        the instruction count for all transforms.
4847 
4848   // Handle size-changing casts.
4849   SDValue X = N0.getOperand(0);
4850   SDValue Y = N1.getOperand(0);
4851   EVT XVT = X.getValueType();
4852   SDLoc DL(N);
4853   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4854       HandOpcode == ISD::SIGN_EXTEND) {
4855     // If both operands have other uses, this transform would create extra
4856     // instructions without eliminating anything.
4857     if (!N0.hasOneUse() && !N1.hasOneUse())
4858       return SDValue();
4859     // We need matching integer source types.
4860     if (XVT != Y.getValueType())
4861       return SDValue();
4862     // Don't create an illegal op during or after legalization. Don't ever
4863     // create an unsupported vector op.
4864     if ((VT.isVector() || LegalOperations) &&
4865         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4866       return SDValue();
4867     // Avoid infinite looping with PromoteIntBinOp.
4868     // TODO: Should we apply desirable/legal constraints to all opcodes?
4869     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4870         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4871       return SDValue();
4872     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4873     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4874     return DAG.getNode(HandOpcode, DL, VT, Logic);
4875   }
4876 
4877   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4878   if (HandOpcode == ISD::TRUNCATE) {
4879     // If both operands have other uses, this transform would create extra
4880     // instructions without eliminating anything.
4881     if (!N0.hasOneUse() && !N1.hasOneUse())
4882       return SDValue();
4883     // We need matching source types.
4884     if (XVT != Y.getValueType())
4885       return SDValue();
4886     // Don't create an illegal op during or after legalization.
4887     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4888       return SDValue();
4889     // Be extra careful sinking truncate. If it's free, there's no benefit in
4890     // widening a binop. Also, don't create a logic op on an illegal type.
4891     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4892       return SDValue();
4893     if (!TLI.isTypeLegal(XVT))
4894       return SDValue();
4895     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4896     return DAG.getNode(HandOpcode, DL, VT, Logic);
4897   }
4898 
4899   // For binops SHL/SRL/SRA/AND:
4900   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4901   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4902        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4903       N0.getOperand(1) == N1.getOperand(1)) {
4904     // If either operand has other uses, this transform is not an improvement.
4905     if (!N0.hasOneUse() || !N1.hasOneUse())
4906       return SDValue();
4907     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4908     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4909   }
4910 
4911   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4912   if (HandOpcode == ISD::BSWAP) {
4913     // If either operand has other uses, this transform is not an improvement.
4914     if (!N0.hasOneUse() || !N1.hasOneUse())
4915       return SDValue();
4916     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4917     return DAG.getNode(HandOpcode, DL, VT, Logic);
4918   }
4919 
4920   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4921   // Only perform this optimization up until type legalization, before
4922   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4923   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4924   // we don't want to undo this promotion.
4925   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4926   // on scalars.
4927   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4928        Level <= AfterLegalizeTypes) {
4929     // Input types must be integer and the same.
4930     if (XVT.isInteger() && XVT == Y.getValueType() &&
4931         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4932           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4933       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4934       return DAG.getNode(HandOpcode, DL, VT, Logic);
4935     }
4936   }
4937 
4938   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4939   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4940   // If both shuffles use the same mask, and both shuffle within a single
4941   // vector, then it is worthwhile to move the swizzle after the operation.
4942   // The type-legalizer generates this pattern when loading illegal
4943   // vector types from memory. In many cases this allows additional shuffle
4944   // optimizations.
4945   // There are other cases where moving the shuffle after the xor/and/or
4946   // is profitable even if shuffles don't perform a swizzle.
4947   // If both shuffles use the same mask, and both shuffles have the same first
4948   // or second operand, then it might still be profitable to move the shuffle
4949   // after the xor/and/or operation.
4950   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4951     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4952     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4953     assert(X.getValueType() == Y.getValueType() &&
4954            "Inputs to shuffles are not the same type");
4955 
4956     // Check that both shuffles use the same mask. The masks are known to be of
4957     // the same length because the result vector type is the same.
4958     // Check also that shuffles have only one use to avoid introducing extra
4959     // instructions.
4960     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4961         !SVN0->getMask().equals(SVN1->getMask()))
4962       return SDValue();
4963 
4964     // Don't try to fold this node if it requires introducing a
4965     // build vector of all zeros that might be illegal at this stage.
4966     SDValue ShOp = N0.getOperand(1);
4967     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4968       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4969 
4970     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4971     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4972       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4973                                   N0.getOperand(0), N1.getOperand(0));
4974       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4975     }
4976 
4977     // Don't try to fold this node if it requires introducing a
4978     // build vector of all zeros that might be illegal at this stage.
4979     ShOp = N0.getOperand(0);
4980     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4981       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4982 
4983     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4984     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4985       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4986                                   N1.getOperand(1));
4987       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4988     }
4989   }
4990 
4991   return SDValue();
4992 }
4993 
4994 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4995 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4996                                        const SDLoc &DL) {
4997   SDValue LL, LR, RL, RR, N0CC, N1CC;
4998   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4999       !isSetCCEquivalent(N1, RL, RR, N1CC))
5000     return SDValue();
5001 
5002   assert(N0.getValueType() == N1.getValueType() &&
5003          "Unexpected operand types for bitwise logic op");
5004   assert(LL.getValueType() == LR.getValueType() &&
5005          RL.getValueType() == RR.getValueType() &&
5006          "Unexpected operand types for setcc");
5007 
5008   // If we're here post-legalization or the logic op type is not i1, the logic
5009   // op type must match a setcc result type. Also, all folds require new
5010   // operations on the left and right operands, so those types must match.
5011   EVT VT = N0.getValueType();
5012   EVT OpVT = LL.getValueType();
5013   if (LegalOperations || VT.getScalarType() != MVT::i1)
5014     if (VT != getSetCCResultType(OpVT))
5015       return SDValue();
5016   if (OpVT != RL.getValueType())
5017     return SDValue();
5018 
5019   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5020   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5021   bool IsInteger = OpVT.isInteger();
5022   if (LR == RR && CC0 == CC1 && IsInteger) {
5023     bool IsZero = isNullOrNullSplat(LR);
5024     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5025 
5026     // All bits clear?
5027     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5028     // All sign bits clear?
5029     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5030     // Any bits set?
5031     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5032     // Any sign bits set?
5033     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5034 
5035     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
5036     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5037     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
5038     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
5039     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5040       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5041       AddToWorklist(Or.getNode());
5042       return DAG.getSetCC(DL, VT, Or, LR, CC1);
5043     }
5044 
5045     // All bits set?
5046     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5047     // All sign bits set?
5048     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5049     // Any bits clear?
5050     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5051     // Any sign bits clear?
5052     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5053 
5054     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5055     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5056     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5057     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
5058     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5059       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5060       AddToWorklist(And.getNode());
5061       return DAG.getSetCC(DL, VT, And, LR, CC1);
5062     }
5063   }
5064 
5065   // TODO: What is the 'or' equivalent of this fold?
5066   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5067   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5068       IsInteger && CC0 == ISD::SETNE &&
5069       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5070        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5071     SDValue One = DAG.getConstant(1, DL, OpVT);
5072     SDValue Two = DAG.getConstant(2, DL, OpVT);
5073     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5074     AddToWorklist(Add.getNode());
5075     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5076   }
5077 
5078   // Try more general transforms if the predicates match and the only user of
5079   // the compares is the 'and' or 'or'.
5080   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5081       N0.hasOneUse() && N1.hasOneUse()) {
5082     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5083     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5084     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5085       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5086       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5087       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5088       SDValue Zero = DAG.getConstant(0, DL, OpVT);
5089       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5090     }
5091 
5092     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5093     // TODO - support non-uniform vector amounts.
5094     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5095       // Match a shared variable operand and 2 non-opaque constant operands.
5096       ConstantSDNode *C0 = isConstOrConstSplat(LR);
5097       ConstantSDNode *C1 = isConstOrConstSplat(RR);
5098       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5099         const APInt &CMax =
5100             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5101         const APInt &CMin =
5102             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5103         // The difference of the constants must be a single bit.
5104         if ((CMax - CMin).isPowerOf2()) {
5105           // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5106           // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5107           SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5108           SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5109           SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5110           SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5111           SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5112           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5113           SDValue Zero = DAG.getConstant(0, DL, OpVT);
5114           return DAG.getSetCC(DL, VT, And, Zero, CC0);
5115         }
5116       }
5117     }
5118   }
5119 
5120   // Canonicalize equivalent operands to LL == RL.
5121   if (LL == RR && LR == RL) {
5122     CC1 = ISD::getSetCCSwappedOperands(CC1);
5123     std::swap(RL, RR);
5124   }
5125 
5126   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5127   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5128   if (LL == RL && LR == RR) {
5129     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5130                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5131     if (NewCC != ISD::SETCC_INVALID &&
5132         (!LegalOperations ||
5133          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5134           TLI.isOperationLegal(ISD::SETCC, OpVT))))
5135       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5136   }
5137 
5138   return SDValue();
5139 }
5140 
5141 /// This contains all DAGCombine rules which reduce two values combined by
5142 /// an And operation to a single value. This makes them reusable in the context
5143 /// of visitSELECT(). Rules involving constants are not included as
5144 /// visitSELECT() already handles those cases.
5145 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5146   EVT VT = N1.getValueType();
5147   SDLoc DL(N);
5148 
5149   // fold (and x, undef) -> 0
5150   if (N0.isUndef() || N1.isUndef())
5151     return DAG.getConstant(0, DL, VT);
5152 
5153   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5154     return V;
5155 
5156   // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5157   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5158       VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5159     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5160       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5161         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5162         // immediate for an add, but it is legal if its top c2 bits are set,
5163         // transform the ADD so the immediate doesn't need to be materialized
5164         // in a register.
5165         APInt ADDC = ADDI->getAPIntValue();
5166         APInt SRLC = SRLI->getAPIntValue();
5167         if (ADDC.getMinSignedBits() <= 64 &&
5168             SRLC.ult(VT.getSizeInBits()) &&
5169             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5170           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5171                                              SRLC.getZExtValue());
5172           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5173             ADDC |= Mask;
5174             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5175               SDLoc DL0(N0);
5176               SDValue NewAdd =
5177                 DAG.getNode(ISD::ADD, DL0, VT,
5178                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5179               CombineTo(N0.getNode(), NewAdd);
5180               // Return N so it doesn't get rechecked!
5181               return SDValue(N, 0);
5182             }
5183           }
5184         }
5185       }
5186     }
5187   }
5188 
5189   // Reduce bit extract of low half of an integer to the narrower type.
5190   // (and (srl i64:x, K), KMask) ->
5191   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5192   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5193     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5194       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5195         unsigned Size = VT.getSizeInBits();
5196         const APInt &AndMask = CAnd->getAPIntValue();
5197         unsigned ShiftBits = CShift->getZExtValue();
5198 
5199         // Bail out, this node will probably disappear anyway.
5200         if (ShiftBits == 0)
5201           return SDValue();
5202 
5203         unsigned MaskBits = AndMask.countTrailingOnes();
5204         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5205 
5206         if (AndMask.isMask() &&
5207             // Required bits must not span the two halves of the integer and
5208             // must fit in the half size type.
5209             (ShiftBits + MaskBits <= Size / 2) &&
5210             TLI.isNarrowingProfitable(VT, HalfVT) &&
5211             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5212             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5213             TLI.isTruncateFree(VT, HalfVT) &&
5214             TLI.isZExtFree(HalfVT, VT)) {
5215           // The isNarrowingProfitable is to avoid regressions on PPC and
5216           // AArch64 which match a few 64-bit bit insert / bit extract patterns
5217           // on downstream users of this. Those patterns could probably be
5218           // extended to handle extensions mixed in.
5219 
5220           SDValue SL(N0);
5221           assert(MaskBits <= Size);
5222 
5223           // Extracting the highest bit of the low half.
5224           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5225           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5226                                       N0.getOperand(0));
5227 
5228           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5229           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5230           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5231           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5232           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5233         }
5234       }
5235     }
5236   }
5237 
5238   return SDValue();
5239 }
5240 
5241 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5242                                    EVT LoadResultTy, EVT &ExtVT) {
5243   if (!AndC->getAPIntValue().isMask())
5244     return false;
5245 
5246   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5247 
5248   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5249   EVT LoadedVT = LoadN->getMemoryVT();
5250 
5251   if (ExtVT == LoadedVT &&
5252       (!LegalOperations ||
5253        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5254     // ZEXTLOAD will match without needing to change the size of the value being
5255     // loaded.
5256     return true;
5257   }
5258 
5259   // Do not change the width of a volatile or atomic loads.
5260   if (!LoadN->isSimple())
5261     return false;
5262 
5263   // Do not generate loads of non-round integer types since these can
5264   // be expensive (and would be wrong if the type is not byte sized).
5265   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5266     return false;
5267 
5268   if (LegalOperations &&
5269       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5270     return false;
5271 
5272   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5273     return false;
5274 
5275   return true;
5276 }
5277 
5278 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5279                                     ISD::LoadExtType ExtType, EVT &MemVT,
5280                                     unsigned ShAmt) {
5281   if (!LDST)
5282     return false;
5283   // Only allow byte offsets.
5284   if (ShAmt % 8)
5285     return false;
5286 
5287   // Do not generate loads of non-round integer types since these can
5288   // be expensive (and would be wrong if the type is not byte sized).
5289   if (!MemVT.isRound())
5290     return false;
5291 
5292   // Don't change the width of a volatile or atomic loads.
5293   if (!LDST->isSimple())
5294     return false;
5295 
5296   EVT LdStMemVT = LDST->getMemoryVT();
5297 
5298   // Bail out when changing the scalable property, since we can't be sure that
5299   // we're actually narrowing here.
5300   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5301     return false;
5302 
5303   // Verify that we are actually reducing a load width here.
5304   if (LdStMemVT.bitsLT(MemVT))
5305     return false;
5306 
5307   // Ensure that this isn't going to produce an unsupported memory access.
5308   if (ShAmt) {
5309     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5310     const unsigned ByteShAmt = ShAmt / 8;
5311     const Align LDSTAlign = LDST->getAlign();
5312     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5313     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5314                                 LDST->getAddressSpace(), NarrowAlign,
5315                                 LDST->getMemOperand()->getFlags()))
5316       return false;
5317   }
5318 
5319   // It's not possible to generate a constant of extended or untyped type.
5320   EVT PtrType = LDST->getBasePtr().getValueType();
5321   if (PtrType == MVT::Untyped || PtrType.isExtended())
5322     return false;
5323 
5324   if (isa<LoadSDNode>(LDST)) {
5325     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5326     // Don't transform one with multiple uses, this would require adding a new
5327     // load.
5328     if (!SDValue(Load, 0).hasOneUse())
5329       return false;
5330 
5331     if (LegalOperations &&
5332         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5333       return false;
5334 
5335     // For the transform to be legal, the load must produce only two values
5336     // (the value loaded and the chain).  Don't transform a pre-increment
5337     // load, for example, which produces an extra value.  Otherwise the
5338     // transformation is not equivalent, and the downstream logic to replace
5339     // uses gets things wrong.
5340     if (Load->getNumValues() > 2)
5341       return false;
5342 
5343     // If the load that we're shrinking is an extload and we're not just
5344     // discarding the extension we can't simply shrink the load. Bail.
5345     // TODO: It would be possible to merge the extensions in some cases.
5346     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5347         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5348       return false;
5349 
5350     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5351       return false;
5352   } else {
5353     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5354     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5355     // Can't write outside the original store
5356     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5357       return false;
5358 
5359     if (LegalOperations &&
5360         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5361       return false;
5362   }
5363   return true;
5364 }
5365 
5366 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5367                                     SmallVectorImpl<LoadSDNode*> &Loads,
5368                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5369                                     ConstantSDNode *Mask,
5370                                     SDNode *&NodeToMask) {
5371   // Recursively search for the operands, looking for loads which can be
5372   // narrowed.
5373   for (SDValue Op : N->op_values()) {
5374     if (Op.getValueType().isVector())
5375       return false;
5376 
5377     // Some constants may need fixing up later if they are too large.
5378     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5379       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5380           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5381         NodesWithConsts.insert(N);
5382       continue;
5383     }
5384 
5385     if (!Op.hasOneUse())
5386       return false;
5387 
5388     switch(Op.getOpcode()) {
5389     case ISD::LOAD: {
5390       auto *Load = cast<LoadSDNode>(Op);
5391       EVT ExtVT;
5392       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5393           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5394 
5395         // ZEXTLOAD is already small enough.
5396         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5397             ExtVT.bitsGE(Load->getMemoryVT()))
5398           continue;
5399 
5400         // Use LE to convert equal sized loads to zext.
5401         if (ExtVT.bitsLE(Load->getMemoryVT()))
5402           Loads.push_back(Load);
5403 
5404         continue;
5405       }
5406       return false;
5407     }
5408     case ISD::ZERO_EXTEND:
5409     case ISD::AssertZext: {
5410       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5411       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5412       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5413         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5414         Op.getOperand(0).getValueType();
5415 
5416       // We can accept extending nodes if the mask is wider or an equal
5417       // width to the original type.
5418       if (ExtVT.bitsGE(VT))
5419         continue;
5420       break;
5421     }
5422     case ISD::OR:
5423     case ISD::XOR:
5424     case ISD::AND:
5425       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5426                              NodeToMask))
5427         return false;
5428       continue;
5429     }
5430 
5431     // Allow one node which will masked along with any loads found.
5432     if (NodeToMask)
5433       return false;
5434 
5435     // Also ensure that the node to be masked only produces one data result.
5436     NodeToMask = Op.getNode();
5437     if (NodeToMask->getNumValues() > 1) {
5438       bool HasValue = false;
5439       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5440         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5441         if (VT != MVT::Glue && VT != MVT::Other) {
5442           if (HasValue) {
5443             NodeToMask = nullptr;
5444             return false;
5445           }
5446           HasValue = true;
5447         }
5448       }
5449       assert(HasValue && "Node to be masked has no data result?");
5450     }
5451   }
5452   return true;
5453 }
5454 
5455 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5456   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5457   if (!Mask)
5458     return false;
5459 
5460   if (!Mask->getAPIntValue().isMask())
5461     return false;
5462 
5463   // No need to do anything if the and directly uses a load.
5464   if (isa<LoadSDNode>(N->getOperand(0)))
5465     return false;
5466 
5467   SmallVector<LoadSDNode*, 8> Loads;
5468   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5469   SDNode *FixupNode = nullptr;
5470   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5471     if (Loads.size() == 0)
5472       return false;
5473 
5474     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5475     SDValue MaskOp = N->getOperand(1);
5476 
5477     // If it exists, fixup the single node we allow in the tree that needs
5478     // masking.
5479     if (FixupNode) {
5480       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5481       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5482                                 FixupNode->getValueType(0),
5483                                 SDValue(FixupNode, 0), MaskOp);
5484       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5485       if (And.getOpcode() == ISD ::AND)
5486         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5487     }
5488 
5489     // Narrow any constants that need it.
5490     for (auto *LogicN : NodesWithConsts) {
5491       SDValue Op0 = LogicN->getOperand(0);
5492       SDValue Op1 = LogicN->getOperand(1);
5493 
5494       if (isa<ConstantSDNode>(Op0))
5495           std::swap(Op0, Op1);
5496 
5497       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5498                                 Op1, MaskOp);
5499 
5500       DAG.UpdateNodeOperands(LogicN, Op0, And);
5501     }
5502 
5503     // Create narrow loads.
5504     for (auto *Load : Loads) {
5505       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5506       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5507                                 SDValue(Load, 0), MaskOp);
5508       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5509       if (And.getOpcode() == ISD ::AND)
5510         And = SDValue(
5511             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5512       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5513       assert(NewLoad &&
5514              "Shouldn't be masking the load if it can't be narrowed");
5515       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5516     }
5517     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5518     return true;
5519   }
5520   return false;
5521 }
5522 
5523 // Unfold
5524 //    x &  (-1 'logical shift' y)
5525 // To
5526 //    (x 'opposite logical shift' y) 'logical shift' y
5527 // if it is better for performance.
5528 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5529   assert(N->getOpcode() == ISD::AND);
5530 
5531   SDValue N0 = N->getOperand(0);
5532   SDValue N1 = N->getOperand(1);
5533 
5534   // Do we actually prefer shifts over mask?
5535   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5536     return SDValue();
5537 
5538   // Try to match  (-1 '[outer] logical shift' y)
5539   unsigned OuterShift;
5540   unsigned InnerShift; // The opposite direction to the OuterShift.
5541   SDValue Y;           // Shift amount.
5542   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5543     if (!M.hasOneUse())
5544       return false;
5545     OuterShift = M->getOpcode();
5546     if (OuterShift == ISD::SHL)
5547       InnerShift = ISD::SRL;
5548     else if (OuterShift == ISD::SRL)
5549       InnerShift = ISD::SHL;
5550     else
5551       return false;
5552     if (!isAllOnesConstant(M->getOperand(0)))
5553       return false;
5554     Y = M->getOperand(1);
5555     return true;
5556   };
5557 
5558   SDValue X;
5559   if (matchMask(N1))
5560     X = N0;
5561   else if (matchMask(N0))
5562     X = N1;
5563   else
5564     return SDValue();
5565 
5566   SDLoc DL(N);
5567   EVT VT = N->getValueType(0);
5568 
5569   //     tmp = x   'opposite logical shift' y
5570   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5571   //     ret = tmp 'logical shift' y
5572   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5573 
5574   return T1;
5575 }
5576 
5577 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5578 /// For a target with a bit test, this is expected to become test + set and save
5579 /// at least 1 instruction.
5580 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5581   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5582 
5583   // This is probably not worthwhile without a supported type.
5584   EVT VT = And->getValueType(0);
5585   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5586   if (!TLI.isTypeLegal(VT))
5587     return SDValue();
5588 
5589   // Look through an optional extension and find a 'not'.
5590   // TODO: Should we favor test+set even without the 'not' op?
5591   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5592   if (Not.getOpcode() == ISD::ANY_EXTEND)
5593     Not = Not.getOperand(0);
5594   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5595     return SDValue();
5596 
5597   // Look though an optional truncation. The source operand may not be the same
5598   // type as the original 'and', but that is ok because we are masking off
5599   // everything but the low bit.
5600   SDValue Srl = Not.getOperand(0);
5601   if (Srl.getOpcode() == ISD::TRUNCATE)
5602     Srl = Srl.getOperand(0);
5603 
5604   // Match a shift-right by constant.
5605   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5606       !isa<ConstantSDNode>(Srl.getOperand(1)))
5607     return SDValue();
5608 
5609   // We might have looked through casts that make this transform invalid.
5610   // TODO: If the source type is wider than the result type, do the mask and
5611   //       compare in the source type.
5612   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5613   unsigned VTBitWidth = VT.getSizeInBits();
5614   if (ShiftAmt.uge(VTBitWidth))
5615     return SDValue();
5616 
5617   // Turn this into a bit-test pattern using mask op + setcc:
5618   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5619   SDLoc DL(And);
5620   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5621   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5622   SDValue Mask = DAG.getConstant(
5623       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5624   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5625   SDValue Zero = DAG.getConstant(0, DL, VT);
5626   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5627   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5628 }
5629 
5630 SDValue DAGCombiner::visitAND(SDNode *N) {
5631   SDValue N0 = N->getOperand(0);
5632   SDValue N1 = N->getOperand(1);
5633   EVT VT = N1.getValueType();
5634 
5635   // x & x --> x
5636   if (N0 == N1)
5637     return N0;
5638 
5639   // fold vector ops
5640   if (VT.isVector()) {
5641     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5642       return FoldedVOp;
5643 
5644     // fold (and x, 0) -> 0, vector edition
5645     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
5646       // do not return N0, because undef node may exist in N0
5647       return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
5648                              SDLoc(N), N0.getValueType());
5649     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5650       // do not return N1, because undef node may exist in N1
5651       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
5652                              SDLoc(N), N1.getValueType());
5653 
5654     // fold (and x, -1) -> x, vector edition
5655     if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
5656       return N1;
5657     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
5658       return N0;
5659 
5660     // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5661     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5662     auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5663     if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5664         N0.hasOneUse() && N1.hasOneUse()) {
5665       EVT LoadVT = MLoad->getMemoryVT();
5666       EVT ExtVT = VT;
5667       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5668         // For this AND to be a zero extension of the masked load the elements
5669         // of the BuildVec must mask the bottom bits of the extended element
5670         // type
5671         if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5672           uint64_t ElementSize =
5673               LoadVT.getVectorElementType().getScalarSizeInBits();
5674           if (Splat->getAPIntValue().isMask(ElementSize)) {
5675             return DAG.getMaskedLoad(
5676                 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5677                 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5678                 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5679                 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5680           }
5681         }
5682       }
5683     }
5684   }
5685 
5686   // fold (and c1, c2) -> c1&c2
5687   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5688   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5689     return C;
5690 
5691   // canonicalize constant to RHS
5692   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5693       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5694     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5695 
5696   // fold (and x, -1) -> x
5697   if (isAllOnesConstant(N1))
5698     return N0;
5699 
5700   // if (and x, c) is known to be zero, return 0
5701   unsigned BitWidth = VT.getScalarSizeInBits();
5702   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
5703     return DAG.getConstant(0, SDLoc(N), VT);
5704 
5705   if (SDValue NewSel = foldBinOpIntoSelect(N))
5706     return NewSel;
5707 
5708   // reassociate and
5709   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5710     return RAND;
5711 
5712   // Try to convert a constant mask AND into a shuffle clear mask.
5713   if (VT.isVector())
5714     if (SDValue Shuffle = XformToShuffleWithZero(N))
5715       return Shuffle;
5716 
5717   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5718     return Combined;
5719 
5720   // fold (and (or x, C), D) -> D if (C & D) == D
5721   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5722     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5723   };
5724   if (N0.getOpcode() == ISD::OR &&
5725       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5726     return N1;
5727   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5728   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5729     SDValue N0Op0 = N0.getOperand(0);
5730     APInt Mask = ~N1C->getAPIntValue();
5731     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5732     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5733       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5734                                  N0.getValueType(), N0Op0);
5735 
5736       // Replace uses of the AND with uses of the Zero extend node.
5737       CombineTo(N, Zext);
5738 
5739       // We actually want to replace all uses of the any_extend with the
5740       // zero_extend, to avoid duplicating things.  This will later cause this
5741       // AND to be folded.
5742       CombineTo(N0.getNode(), Zext);
5743       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5744     }
5745   }
5746 
5747   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5748   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5749   // already be zero by virtue of the width of the base type of the load.
5750   //
5751   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5752   // more cases.
5753   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5754        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5755        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5756        N0.getOperand(0).getResNo() == 0) ||
5757       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5758     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5759                                          N0 : N0.getOperand(0) );
5760 
5761     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5762     // This can be a pure constant or a vector splat, in which case we treat the
5763     // vector as a scalar and use the splat value.
5764     APInt Constant = APInt::getZero(1);
5765     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5766       Constant = C->getAPIntValue();
5767     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5768       APInt SplatValue, SplatUndef;
5769       unsigned SplatBitSize;
5770       bool HasAnyUndefs;
5771       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5772                                              SplatBitSize, HasAnyUndefs);
5773       if (IsSplat) {
5774         // Undef bits can contribute to a possible optimisation if set, so
5775         // set them.
5776         SplatValue |= SplatUndef;
5777 
5778         // The splat value may be something like "0x00FFFFFF", which means 0 for
5779         // the first vector value and FF for the rest, repeating. We need a mask
5780         // that will apply equally to all members of the vector, so AND all the
5781         // lanes of the constant together.
5782         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5783 
5784         // If the splat value has been compressed to a bitlength lower
5785         // than the size of the vector lane, we need to re-expand it to
5786         // the lane size.
5787         if (EltBitWidth > SplatBitSize)
5788           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5789                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5790             SplatValue |= SplatValue.shl(SplatBitSize);
5791 
5792         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5793         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5794         if ((SplatBitSize % EltBitWidth) == 0) {
5795           Constant = APInt::getAllOnes(EltBitWidth);
5796           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5797             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5798         }
5799       }
5800     }
5801 
5802     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5803     // actually legal and isn't going to get expanded, else this is a false
5804     // optimisation.
5805     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5806                                                     Load->getValueType(0),
5807                                                     Load->getMemoryVT());
5808 
5809     // Resize the constant to the same size as the original memory access before
5810     // extension. If it is still the AllOnesValue then this AND is completely
5811     // unneeded.
5812     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5813 
5814     bool B;
5815     switch (Load->getExtensionType()) {
5816     default: B = false; break;
5817     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5818     case ISD::ZEXTLOAD:
5819     case ISD::NON_EXTLOAD: B = true; break;
5820     }
5821 
5822     if (B && Constant.isAllOnes()) {
5823       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5824       // preserve semantics once we get rid of the AND.
5825       SDValue NewLoad(Load, 0);
5826 
5827       // Fold the AND away. NewLoad may get replaced immediately.
5828       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5829 
5830       if (Load->getExtensionType() == ISD::EXTLOAD) {
5831         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5832                               Load->getValueType(0), SDLoc(Load),
5833                               Load->getChain(), Load->getBasePtr(),
5834                               Load->getOffset(), Load->getMemoryVT(),
5835                               Load->getMemOperand());
5836         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5837         if (Load->getNumValues() == 3) {
5838           // PRE/POST_INC loads have 3 values.
5839           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5840                            NewLoad.getValue(2) };
5841           CombineTo(Load, To, 3, true);
5842         } else {
5843           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5844         }
5845       }
5846 
5847       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5848     }
5849   }
5850 
5851   // fold (and (masked_gather x)) -> (zext_masked_gather x)
5852   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5853     EVT MemVT = GN0->getMemoryVT();
5854     EVT ScalarVT = MemVT.getScalarType();
5855 
5856     if (SDValue(GN0, 0).hasOneUse() &&
5857         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5858         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5859       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
5860                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
5861 
5862       SDValue ZExtLoad = DAG.getMaskedGather(
5863           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5864           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5865 
5866       CombineTo(N, ZExtLoad);
5867       AddToWorklist(ZExtLoad.getNode());
5868       // Avoid recheck of N.
5869       return SDValue(N, 0);
5870     }
5871   }
5872 
5873   // fold (and (load x), 255) -> (zextload x, i8)
5874   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5875   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5876   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5877                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5878                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5879     if (SDValue Res = ReduceLoadWidth(N)) {
5880       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5881         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5882       AddToWorklist(N);
5883       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5884       return SDValue(N, 0);
5885     }
5886   }
5887 
5888   if (LegalTypes) {
5889     // Attempt to propagate the AND back up to the leaves which, if they're
5890     // loads, can be combined to narrow loads and the AND node can be removed.
5891     // Perform after legalization so that extend nodes will already be
5892     // combined into the loads.
5893     if (BackwardsPropagateMask(N))
5894       return SDValue(N, 0);
5895   }
5896 
5897   if (SDValue Combined = visitANDLike(N0, N1, N))
5898     return Combined;
5899 
5900   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5901   if (N0.getOpcode() == N1.getOpcode())
5902     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5903       return V;
5904 
5905   // Masking the negated extension of a boolean is just the zero-extended
5906   // boolean:
5907   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5908   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5909   //
5910   // Note: the SimplifyDemandedBits fold below can make an information-losing
5911   // transform, and then we have no way to find this better fold.
5912   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5913     if (isNullOrNullSplat(N0.getOperand(0))) {
5914       SDValue SubRHS = N0.getOperand(1);
5915       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5916           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5917         return SubRHS;
5918       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5919           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5920         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5921     }
5922   }
5923 
5924   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5925   // fold (and (sra)) -> (and (srl)) when possible.
5926   if (SimplifyDemandedBits(SDValue(N, 0)))
5927     return SDValue(N, 0);
5928 
5929   // fold (zext_inreg (extload x)) -> (zextload x)
5930   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5931   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5932       (ISD::isEXTLoad(N0.getNode()) ||
5933        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5934     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5935     EVT MemVT = LN0->getMemoryVT();
5936     // If we zero all the possible extended bits, then we can turn this into
5937     // a zextload if we are running before legalize or the operation is legal.
5938     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5939     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5940     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5941     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5942         ((!LegalOperations && LN0->isSimple()) ||
5943          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5944       SDValue ExtLoad =
5945           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5946                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5947       AddToWorklist(N);
5948       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5949       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5950     }
5951   }
5952 
5953   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5954   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5955     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5956                                            N0.getOperand(1), false))
5957       return BSwap;
5958   }
5959 
5960   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5961     return Shifts;
5962 
5963   if (TLI.hasBitTest(N0, N1))
5964     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5965       return V;
5966 
5967   // Recognize the following pattern:
5968   //
5969   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5970   //
5971   // where bitmask is a mask that clears the upper bits of AndVT. The
5972   // number of bits in bitmask must be a power of two.
5973   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5974     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5975       return false;
5976 
5977     auto *C = dyn_cast<ConstantSDNode>(RHS);
5978     if (!C)
5979       return false;
5980 
5981     if (!C->getAPIntValue().isMask(
5982             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5983       return false;
5984 
5985     return true;
5986   };
5987 
5988   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5989   if (IsAndZeroExtMask(N0, N1))
5990     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5991 
5992   return SDValue();
5993 }
5994 
5995 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5996 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5997                                         bool DemandHighBits) {
5998   if (!LegalOperations)
5999     return SDValue();
6000 
6001   EVT VT = N->getValueType(0);
6002   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6003     return SDValue();
6004   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6005     return SDValue();
6006 
6007   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6008   bool LookPassAnd0 = false;
6009   bool LookPassAnd1 = false;
6010   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6011       std::swap(N0, N1);
6012   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6013       std::swap(N0, N1);
6014   if (N0.getOpcode() == ISD::AND) {
6015     if (!N0.getNode()->hasOneUse())
6016       return SDValue();
6017     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6018     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6019     // This is needed for X86.
6020     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6021                   N01C->getZExtValue() != 0xFFFF))
6022       return SDValue();
6023     N0 = N0.getOperand(0);
6024     LookPassAnd0 = true;
6025   }
6026 
6027   if (N1.getOpcode() == ISD::AND) {
6028     if (!N1.getNode()->hasOneUse())
6029       return SDValue();
6030     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6031     if (!N11C || N11C->getZExtValue() != 0xFF)
6032       return SDValue();
6033     N1 = N1.getOperand(0);
6034     LookPassAnd1 = true;
6035   }
6036 
6037   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6038     std::swap(N0, N1);
6039   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6040     return SDValue();
6041   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6042     return SDValue();
6043 
6044   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6045   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6046   if (!N01C || !N11C)
6047     return SDValue();
6048   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6049     return SDValue();
6050 
6051   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6052   SDValue N00 = N0->getOperand(0);
6053   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6054     if (!N00.getNode()->hasOneUse())
6055       return SDValue();
6056     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6057     if (!N001C || N001C->getZExtValue() != 0xFF)
6058       return SDValue();
6059     N00 = N00.getOperand(0);
6060     LookPassAnd0 = true;
6061   }
6062 
6063   SDValue N10 = N1->getOperand(0);
6064   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6065     if (!N10.getNode()->hasOneUse())
6066       return SDValue();
6067     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6068     // Also allow 0xFFFF since the bits will be shifted out. This is needed
6069     // for X86.
6070     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6071                    N101C->getZExtValue() != 0xFFFF))
6072       return SDValue();
6073     N10 = N10.getOperand(0);
6074     LookPassAnd1 = true;
6075   }
6076 
6077   if (N00 != N10)
6078     return SDValue();
6079 
6080   // Make sure everything beyond the low halfword gets set to zero since the SRL
6081   // 16 will clear the top bits.
6082   unsigned OpSizeInBits = VT.getSizeInBits();
6083   if (DemandHighBits && OpSizeInBits > 16) {
6084     // If the left-shift isn't masked out then the only way this is a bswap is
6085     // if all bits beyond the low 8 are 0. In that case the entire pattern
6086     // reduces to a left shift anyway: leave it for other parts of the combiner.
6087     if (!LookPassAnd0)
6088       return SDValue();
6089 
6090     // However, if the right shift isn't masked out then it might be because
6091     // it's not needed. See if we can spot that too.
6092     if (!LookPassAnd1 &&
6093         !DAG.MaskedValueIsZero(
6094             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6095       return SDValue();
6096   }
6097 
6098   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6099   if (OpSizeInBits > 16) {
6100     SDLoc DL(N);
6101     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6102                       DAG.getConstant(OpSizeInBits - 16, DL,
6103                                       getShiftAmountTy(VT)));
6104   }
6105   return Res;
6106 }
6107 
6108 /// Return true if the specified node is an element that makes up a 32-bit
6109 /// packed halfword byteswap.
6110 /// ((x & 0x000000ff) << 8) |
6111 /// ((x & 0x0000ff00) >> 8) |
6112 /// ((x & 0x00ff0000) << 8) |
6113 /// ((x & 0xff000000) >> 8)
6114 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6115   if (!N.getNode()->hasOneUse())
6116     return false;
6117 
6118   unsigned Opc = N.getOpcode();
6119   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6120     return false;
6121 
6122   SDValue N0 = N.getOperand(0);
6123   unsigned Opc0 = N0.getOpcode();
6124   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6125     return false;
6126 
6127   ConstantSDNode *N1C = nullptr;
6128   // SHL or SRL: look upstream for AND mask operand
6129   if (Opc == ISD::AND)
6130     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6131   else if (Opc0 == ISD::AND)
6132     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6133   if (!N1C)
6134     return false;
6135 
6136   unsigned MaskByteOffset;
6137   switch (N1C->getZExtValue()) {
6138   default:
6139     return false;
6140   case 0xFF:       MaskByteOffset = 0; break;
6141   case 0xFF00:     MaskByteOffset = 1; break;
6142   case 0xFFFF:
6143     // In case demanded bits didn't clear the bits that will be shifted out.
6144     // This is needed for X86.
6145     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6146       MaskByteOffset = 1;
6147       break;
6148     }
6149     return false;
6150   case 0xFF0000:   MaskByteOffset = 2; break;
6151   case 0xFF000000: MaskByteOffset = 3; break;
6152   }
6153 
6154   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6155   if (Opc == ISD::AND) {
6156     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6157       // (x >> 8) & 0xff
6158       // (x >> 8) & 0xff0000
6159       if (Opc0 != ISD::SRL)
6160         return false;
6161       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6162       if (!C || C->getZExtValue() != 8)
6163         return false;
6164     } else {
6165       // (x << 8) & 0xff00
6166       // (x << 8) & 0xff000000
6167       if (Opc0 != ISD::SHL)
6168         return false;
6169       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6170       if (!C || C->getZExtValue() != 8)
6171         return false;
6172     }
6173   } else if (Opc == ISD::SHL) {
6174     // (x & 0xff) << 8
6175     // (x & 0xff0000) << 8
6176     if (MaskByteOffset != 0 && MaskByteOffset != 2)
6177       return false;
6178     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6179     if (!C || C->getZExtValue() != 8)
6180       return false;
6181   } else { // Opc == ISD::SRL
6182     // (x & 0xff00) >> 8
6183     // (x & 0xff000000) >> 8
6184     if (MaskByteOffset != 1 && MaskByteOffset != 3)
6185       return false;
6186     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6187     if (!C || C->getZExtValue() != 8)
6188       return false;
6189   }
6190 
6191   if (Parts[MaskByteOffset])
6192     return false;
6193 
6194   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6195   return true;
6196 }
6197 
6198 // Match 2 elements of a packed halfword bswap.
6199 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6200   if (N.getOpcode() == ISD::OR)
6201     return isBSwapHWordElement(N.getOperand(0), Parts) &&
6202            isBSwapHWordElement(N.getOperand(1), Parts);
6203 
6204   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6205     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6206     if (!C || C->getAPIntValue() != 16)
6207       return false;
6208     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6209     return true;
6210   }
6211 
6212   return false;
6213 }
6214 
6215 // Match this pattern:
6216 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6217 // And rewrite this to:
6218 //   (rotr (bswap A), 16)
6219 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6220                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
6221                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
6222   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6223          "MatchBSwapHWordOrAndAnd: expecting i32");
6224   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6225     return SDValue();
6226   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6227     return SDValue();
6228   // TODO: this is too restrictive; lifting this restriction requires more tests
6229   if (!N0->hasOneUse() || !N1->hasOneUse())
6230     return SDValue();
6231   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6232   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6233   if (!Mask0 || !Mask1)
6234     return SDValue();
6235   if (Mask0->getAPIntValue() != 0xff00ff00 ||
6236       Mask1->getAPIntValue() != 0x00ff00ff)
6237     return SDValue();
6238   SDValue Shift0 = N0.getOperand(0);
6239   SDValue Shift1 = N1.getOperand(0);
6240   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6241     return SDValue();
6242   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6243   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6244   if (!ShiftAmt0 || !ShiftAmt1)
6245     return SDValue();
6246   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6247     return SDValue();
6248   if (Shift0.getOperand(0) != Shift1.getOperand(0))
6249     return SDValue();
6250 
6251   SDLoc DL(N);
6252   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6253   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6254   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6255 }
6256 
6257 /// Match a 32-bit packed halfword bswap. That is
6258 /// ((x & 0x000000ff) << 8) |
6259 /// ((x & 0x0000ff00) >> 8) |
6260 /// ((x & 0x00ff0000) << 8) |
6261 /// ((x & 0xff000000) >> 8)
6262 /// => (rotl (bswap x), 16)
6263 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6264   if (!LegalOperations)
6265     return SDValue();
6266 
6267   EVT VT = N->getValueType(0);
6268   if (VT != MVT::i32)
6269     return SDValue();
6270   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6271     return SDValue();
6272 
6273   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6274                                               getShiftAmountTy(VT)))
6275   return BSwap;
6276 
6277   // Try again with commuted operands.
6278   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6279                                               getShiftAmountTy(VT)))
6280   return BSwap;
6281 
6282 
6283   // Look for either
6284   // (or (bswaphpair), (bswaphpair))
6285   // (or (or (bswaphpair), (and)), (and))
6286   // (or (or (and), (bswaphpair)), (and))
6287   SDNode *Parts[4] = {};
6288 
6289   if (isBSwapHWordPair(N0, Parts)) {
6290     // (or (or (and), (and)), (or (and), (and)))
6291     if (!isBSwapHWordPair(N1, Parts))
6292       return SDValue();
6293   } else if (N0.getOpcode() == ISD::OR) {
6294     // (or (or (or (and), (and)), (and)), (and))
6295     if (!isBSwapHWordElement(N1, Parts))
6296       return SDValue();
6297     SDValue N00 = N0.getOperand(0);
6298     SDValue N01 = N0.getOperand(1);
6299     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6300         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6301       return SDValue();
6302   } else
6303     return SDValue();
6304 
6305   // Make sure the parts are all coming from the same node.
6306   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6307     return SDValue();
6308 
6309   SDLoc DL(N);
6310   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6311                               SDValue(Parts[0], 0));
6312 
6313   // Result of the bswap should be rotated by 16. If it's not legal, then
6314   // do  (x << 16) | (x >> 16).
6315   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6316   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6317     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6318   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6319     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6320   return DAG.getNode(ISD::OR, DL, VT,
6321                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6322                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6323 }
6324 
6325 /// This contains all DAGCombine rules which reduce two values combined by
6326 /// an Or operation to a single value \see visitANDLike().
6327 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6328   EVT VT = N1.getValueType();
6329   SDLoc DL(N);
6330 
6331   // fold (or x, undef) -> -1
6332   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6333     return DAG.getAllOnesConstant(DL, VT);
6334 
6335   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6336     return V;
6337 
6338   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6339   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6340       // Don't increase # computations.
6341       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6342     // We can only do this xform if we know that bits from X that are set in C2
6343     // but not in C1 are already zero.  Likewise for Y.
6344     if (const ConstantSDNode *N0O1C =
6345         getAsNonOpaqueConstant(N0.getOperand(1))) {
6346       if (const ConstantSDNode *N1O1C =
6347           getAsNonOpaqueConstant(N1.getOperand(1))) {
6348         // We can only do this xform if we know that bits from X that are set in
6349         // C2 but not in C1 are already zero.  Likewise for Y.
6350         const APInt &LHSMask = N0O1C->getAPIntValue();
6351         const APInt &RHSMask = N1O1C->getAPIntValue();
6352 
6353         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6354             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6355           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6356                                   N0.getOperand(0), N1.getOperand(0));
6357           return DAG.getNode(ISD::AND, DL, VT, X,
6358                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6359         }
6360       }
6361     }
6362   }
6363 
6364   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6365   if (N0.getOpcode() == ISD::AND &&
6366       N1.getOpcode() == ISD::AND &&
6367       N0.getOperand(0) == N1.getOperand(0) &&
6368       // Don't increase # computations.
6369       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6370     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6371                             N0.getOperand(1), N1.getOperand(1));
6372     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6373   }
6374 
6375   return SDValue();
6376 }
6377 
6378 /// OR combines for which the commuted variant will be tried as well.
6379 static SDValue visitORCommutative(
6380     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6381   EVT VT = N0.getValueType();
6382   if (N0.getOpcode() == ISD::AND) {
6383     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6384     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6385       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6386 
6387     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6388     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6389       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6390   }
6391 
6392   return SDValue();
6393 }
6394 
6395 SDValue DAGCombiner::visitOR(SDNode *N) {
6396   SDValue N0 = N->getOperand(0);
6397   SDValue N1 = N->getOperand(1);
6398   EVT VT = N1.getValueType();
6399 
6400   // x | x --> x
6401   if (N0 == N1)
6402     return N0;
6403 
6404   // fold vector ops
6405   if (VT.isVector()) {
6406     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6407       return FoldedVOp;
6408 
6409     // fold (or x, 0) -> x, vector edition
6410     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
6411       return N1;
6412     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6413       return N0;
6414 
6415     // fold (or x, -1) -> -1, vector edition
6416     if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
6417       // do not return N0, because undef node may exist in N0
6418       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6419     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6420       // do not return N1, because undef node may exist in N1
6421       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6422 
6423     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6424     // Do this only if the resulting shuffle is legal.
6425     if (isa<ShuffleVectorSDNode>(N0) &&
6426         isa<ShuffleVectorSDNode>(N1) &&
6427         // Avoid folding a node with illegal type.
6428         TLI.isTypeLegal(VT)) {
6429       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6430       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6431       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6432       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6433       // Ensure both shuffles have a zero input.
6434       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6435         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6436         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6437         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6438         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6439         bool CanFold = true;
6440         int NumElts = VT.getVectorNumElements();
6441         SmallVector<int, 4> Mask(NumElts);
6442 
6443         for (int i = 0; i != NumElts; ++i) {
6444           int M0 = SV0->getMaskElt(i);
6445           int M1 = SV1->getMaskElt(i);
6446 
6447           // Determine if either index is pointing to a zero vector.
6448           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6449           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6450 
6451           // If one element is zero and the otherside is undef, keep undef.
6452           // This also handles the case that both are undef.
6453           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6454             Mask[i] = -1;
6455             continue;
6456           }
6457 
6458           // Make sure only one of the elements is zero.
6459           if (M0Zero == M1Zero) {
6460             CanFold = false;
6461             break;
6462           }
6463 
6464           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6465 
6466           // We have a zero and non-zero element. If the non-zero came from
6467           // SV0 make the index a LHS index. If it came from SV1, make it
6468           // a RHS index. We need to mod by NumElts because we don't care
6469           // which operand it came from in the original shuffles.
6470           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6471         }
6472 
6473         if (CanFold) {
6474           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6475           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6476 
6477           SDValue LegalShuffle =
6478               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6479                                           Mask, DAG);
6480           if (LegalShuffle)
6481             return LegalShuffle;
6482         }
6483       }
6484     }
6485   }
6486 
6487   // fold (or c1, c2) -> c1|c2
6488   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6489   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6490     return C;
6491 
6492   // canonicalize constant to RHS
6493   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6494      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6495     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6496 
6497   // fold (or x, 0) -> x
6498   if (isNullConstant(N1))
6499     return N0;
6500 
6501   // fold (or x, -1) -> -1
6502   if (isAllOnesConstant(N1))
6503     return N1;
6504 
6505   if (SDValue NewSel = foldBinOpIntoSelect(N))
6506     return NewSel;
6507 
6508   // fold (or x, c) -> c iff (x & ~c) == 0
6509   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6510     return N1;
6511 
6512   if (SDValue Combined = visitORLike(N0, N1, N))
6513     return Combined;
6514 
6515   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6516     return Combined;
6517 
6518   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6519   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6520     return BSwap;
6521   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6522     return BSwap;
6523 
6524   // reassociate or
6525   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6526     return ROR;
6527 
6528   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6529   // iff (c1 & c2) != 0 or c1/c2 are undef.
6530   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6531     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6532   };
6533   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6534       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6535     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6536                                                  {N1, N0.getOperand(1)})) {
6537       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6538       AddToWorklist(IOR.getNode());
6539       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6540     }
6541   }
6542 
6543   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6544     return Combined;
6545   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6546     return Combined;
6547 
6548   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6549   if (N0.getOpcode() == N1.getOpcode())
6550     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6551       return V;
6552 
6553   // See if this is some rotate idiom.
6554   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6555     return Rot;
6556 
6557   if (SDValue Load = MatchLoadCombine(N))
6558     return Load;
6559 
6560   // Simplify the operands using demanded-bits information.
6561   if (SimplifyDemandedBits(SDValue(N, 0)))
6562     return SDValue(N, 0);
6563 
6564   // If OR can be rewritten into ADD, try combines based on ADD.
6565   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6566       DAG.haveNoCommonBitsSet(N0, N1))
6567     if (SDValue Combined = visitADDLike(N))
6568       return Combined;
6569 
6570   return SDValue();
6571 }
6572 
6573 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6574   if (Op.getOpcode() == ISD::AND &&
6575       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6576     Mask = Op.getOperand(1);
6577     return Op.getOperand(0);
6578   }
6579   return Op;
6580 }
6581 
6582 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6583 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6584                             SDValue &Mask) {
6585   Op = stripConstantMask(DAG, Op, Mask);
6586   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6587     Shift = Op;
6588     return true;
6589   }
6590   return false;
6591 }
6592 
6593 /// Helper function for visitOR to extract the needed side of a rotate idiom
6594 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6595 /// InstCombine merged some outside op with one of the shifts from
6596 /// the rotate pattern.
6597 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6598 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6599 /// patterns:
6600 ///
6601 ///   (or (add v v) (shrl v bitwidth-1)):
6602 ///     expands (add v v) -> (shl v 1)
6603 ///
6604 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6605 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6606 ///
6607 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6608 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6609 ///
6610 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6611 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6612 ///
6613 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6614 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6615 ///
6616 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6617 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6618                                      SDValue ExtractFrom, SDValue &Mask,
6619                                      const SDLoc &DL) {
6620   assert(OppShift && ExtractFrom && "Empty SDValue");
6621   assert(
6622       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6623       "Existing shift must be valid as a rotate half");
6624 
6625   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6626 
6627   // Value and Type of the shift.
6628   SDValue OppShiftLHS = OppShift.getOperand(0);
6629   EVT ShiftedVT = OppShiftLHS.getValueType();
6630 
6631   // Amount of the existing shift.
6632   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6633 
6634   // (add v v) -> (shl v 1)
6635   // TODO: Should this be a general DAG canonicalization?
6636   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6637       ExtractFrom.getOpcode() == ISD::ADD &&
6638       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6639       ExtractFrom.getOperand(0) == OppShiftLHS &&
6640       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6641     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6642                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6643 
6644   // Preconditions:
6645   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6646   //
6647   // Find opcode of the needed shift to be extracted from (op0 v c0).
6648   unsigned Opcode = ISD::DELETED_NODE;
6649   bool IsMulOrDiv = false;
6650   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6651   // opcode or its arithmetic (mul or udiv) variant.
6652   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6653     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6654     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6655       return false;
6656     Opcode = NeededShift;
6657     return true;
6658   };
6659   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6660   // that the needed shift can be extracted from.
6661   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6662       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6663     return SDValue();
6664 
6665   // op0 must be the same opcode on both sides, have the same LHS argument,
6666   // and produce the same value type.
6667   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6668       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6669       ShiftedVT != ExtractFrom.getValueType())
6670     return SDValue();
6671 
6672   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6673   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6674   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6675   ConstantSDNode *ExtractFromCst =
6676       isConstOrConstSplat(ExtractFrom.getOperand(1));
6677   // TODO: We should be able to handle non-uniform constant vectors for these values
6678   // Check that we have constant values.
6679   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6680       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6681       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6682     return SDValue();
6683 
6684   // Compute the shift amount we need to extract to complete the rotate.
6685   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6686   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6687     return SDValue();
6688   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6689   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6690   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6691   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6692   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6693 
6694   // Now try extract the needed shift from the ExtractFrom op and see if the
6695   // result matches up with the existing shift's LHS op.
6696   if (IsMulOrDiv) {
6697     // Op to extract from is a mul or udiv by a constant.
6698     // Check:
6699     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6700     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6701     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6702                                                  NeededShiftAmt.getZExtValue());
6703     APInt ResultAmt;
6704     APInt Rem;
6705     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6706     if (Rem != 0 || ResultAmt != OppLHSAmt)
6707       return SDValue();
6708   } else {
6709     // Op to extract from is a shift by a constant.
6710     // Check:
6711     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6712     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6713                                           ExtractFromAmt.getBitWidth()))
6714       return SDValue();
6715   }
6716 
6717   // Return the expanded shift op that should allow a rotate to be formed.
6718   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6719   EVT ResVT = ExtractFrom.getValueType();
6720   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6721   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6722 }
6723 
6724 // Return true if we can prove that, whenever Neg and Pos are both in the
6725 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6726 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6727 //
6728 //     (or (shift1 X, Neg), (shift2 X, Pos))
6729 //
6730 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6731 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6732 // to consider shift amounts with defined behavior.
6733 //
6734 // The IsRotate flag should be set when the LHS of both shifts is the same.
6735 // Otherwise if matching a general funnel shift, it should be clear.
6736 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6737                            SelectionDAG &DAG, bool IsRotate) {
6738   // If EltSize is a power of 2 then:
6739   //
6740   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6741   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6742   //
6743   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6744   // for the stronger condition:
6745   //
6746   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6747   //
6748   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6749   // we can just replace Neg with Neg' for the rest of the function.
6750   //
6751   // In other cases we check for the even stronger condition:
6752   //
6753   //     Neg == EltSize - Pos                                    [B]
6754   //
6755   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6756   // behavior if Pos == 0 (and consequently Neg == EltSize).
6757   //
6758   // We could actually use [A] whenever EltSize is a power of 2, but the
6759   // only extra cases that it would match are those uninteresting ones
6760   // where Neg and Pos are never in range at the same time.  E.g. for
6761   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6762   // as well as (sub 32, Pos), but:
6763   //
6764   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6765   //
6766   // always invokes undefined behavior for 32-bit X.
6767   //
6768   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6769   //
6770   // NOTE: We can only do this when matching an AND and not a general
6771   // funnel shift.
6772   unsigned MaskLoBits = 0;
6773   if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6774     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6775       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6776       unsigned Bits = Log2_64(EltSize);
6777       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6778           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6779         Neg = Neg.getOperand(0);
6780         MaskLoBits = Bits;
6781       }
6782     }
6783   }
6784 
6785   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6786   if (Neg.getOpcode() != ISD::SUB)
6787     return false;
6788   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6789   if (!NegC)
6790     return false;
6791   SDValue NegOp1 = Neg.getOperand(1);
6792 
6793   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6794   // Pos'.  The truncation is redundant for the purpose of the equality.
6795   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6796     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6797       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6798       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6799           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6800            MaskLoBits))
6801         Pos = Pos.getOperand(0);
6802     }
6803   }
6804 
6805   // The condition we need is now:
6806   //
6807   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6808   //
6809   // If NegOp1 == Pos then we need:
6810   //
6811   //              EltSize & Mask == NegC & Mask
6812   //
6813   // (because "x & Mask" is a truncation and distributes through subtraction).
6814   //
6815   // We also need to account for a potential truncation of NegOp1 if the amount
6816   // has already been legalized to a shift amount type.
6817   APInt Width;
6818   if ((Pos == NegOp1) ||
6819       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6820     Width = NegC->getAPIntValue();
6821 
6822   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6823   // Then the condition we want to prove becomes:
6824   //
6825   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6826   //
6827   // which, again because "x & Mask" is a truncation, becomes:
6828   //
6829   //                NegC & Mask == (EltSize - PosC) & Mask
6830   //             EltSize & Mask == (NegC + PosC) & Mask
6831   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6832     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6833       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6834     else
6835       return false;
6836   } else
6837     return false;
6838 
6839   // Now we just need to check that EltSize & Mask == Width & Mask.
6840   if (MaskLoBits)
6841     // EltSize & Mask is 0 since Mask is EltSize - 1.
6842     return Width.getLoBits(MaskLoBits) == 0;
6843   return Width == EltSize;
6844 }
6845 
6846 // A subroutine of MatchRotate used once we have found an OR of two opposite
6847 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6848 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6849 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6850 // Neg with outer conversions stripped away.
6851 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6852                                        SDValue Neg, SDValue InnerPos,
6853                                        SDValue InnerNeg, unsigned PosOpcode,
6854                                        unsigned NegOpcode, const SDLoc &DL) {
6855   // fold (or (shl x, (*ext y)),
6856   //          (srl x, (*ext (sub 32, y)))) ->
6857   //   (rotl x, y) or (rotr x, (sub 32, y))
6858   //
6859   // fold (or (shl x, (*ext (sub 32, y))),
6860   //          (srl x, (*ext y))) ->
6861   //   (rotr x, y) or (rotl x, (sub 32, y))
6862   EVT VT = Shifted.getValueType();
6863   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
6864                      /*IsRotate*/ true)) {
6865     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6866     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6867                        HasPos ? Pos : Neg);
6868   }
6869 
6870   return SDValue();
6871 }
6872 
6873 // A subroutine of MatchRotate used once we have found an OR of two opposite
6874 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
6875 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6876 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6877 // Neg with outer conversions stripped away.
6878 // TODO: Merge with MatchRotatePosNeg.
6879 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6880                                        SDValue Neg, SDValue InnerPos,
6881                                        SDValue InnerNeg, unsigned PosOpcode,
6882                                        unsigned NegOpcode, const SDLoc &DL) {
6883   EVT VT = N0.getValueType();
6884   unsigned EltBits = VT.getScalarSizeInBits();
6885 
6886   // fold (or (shl x0, (*ext y)),
6887   //          (srl x1, (*ext (sub 32, y)))) ->
6888   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6889   //
6890   // fold (or (shl x0, (*ext (sub 32, y))),
6891   //          (srl x1, (*ext y))) ->
6892   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6893   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
6894     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6895     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6896                        HasPos ? Pos : Neg);
6897   }
6898 
6899   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6900   // so for now just use the PosOpcode case if its legal.
6901   // TODO: When can we use the NegOpcode case?
6902   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6903     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6904       if (Op.getOpcode() != BinOpc)
6905         return false;
6906       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6907       return Cst && (Cst->getAPIntValue() == Imm);
6908     };
6909 
6910     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6911     //   -> (fshl x0, x1, y)
6912     if (IsBinOpImm(N1, ISD::SRL, 1) &&
6913         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6914         InnerPos == InnerNeg.getOperand(0) &&
6915         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6916       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6917     }
6918 
6919     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6920     //   -> (fshr x0, x1, y)
6921     if (IsBinOpImm(N0, ISD::SHL, 1) &&
6922         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6923         InnerNeg == InnerPos.getOperand(0) &&
6924         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6925       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6926     }
6927 
6928     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6929     //   -> (fshr x0, x1, y)
6930     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6931     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6932         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6933         InnerNeg == InnerPos.getOperand(0) &&
6934         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6935       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6936     }
6937   }
6938 
6939   return SDValue();
6940 }
6941 
6942 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6943 // idioms for rotate, and if the target supports rotation instructions, generate
6944 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6945 // with different shifted sources.
6946 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6947   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6948   EVT VT = LHS.getValueType();
6949   if (!TLI.isTypeLegal(VT))
6950     return SDValue();
6951 
6952   // The target must have at least one rotate/funnel flavor.
6953   bool HasROTL = hasOperation(ISD::ROTL, VT);
6954   bool HasROTR = hasOperation(ISD::ROTR, VT);
6955   bool HasFSHL = hasOperation(ISD::FSHL, VT);
6956   bool HasFSHR = hasOperation(ISD::FSHR, VT);
6957   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6958     return SDValue();
6959 
6960   // Check for truncated rotate.
6961   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6962       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6963     assert(LHS.getValueType() == RHS.getValueType());
6964     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6965       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6966     }
6967   }
6968 
6969   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6970   SDValue LHSShift;   // The shift.
6971   SDValue LHSMask;    // AND value if any.
6972   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6973 
6974   SDValue RHSShift;   // The shift.
6975   SDValue RHSMask;    // AND value if any.
6976   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6977 
6978   // If neither side matched a rotate half, bail
6979   if (!LHSShift && !RHSShift)
6980     return SDValue();
6981 
6982   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6983   // side of the rotate, so try to handle that here. In all cases we need to
6984   // pass the matched shift from the opposite side to compute the opcode and
6985   // needed shift amount to extract.  We still want to do this if both sides
6986   // matched a rotate half because one half may be a potential overshift that
6987   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6988   // single one).
6989 
6990   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6991   if (LHSShift)
6992     if (SDValue NewRHSShift =
6993             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6994       RHSShift = NewRHSShift;
6995   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6996   if (RHSShift)
6997     if (SDValue NewLHSShift =
6998             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6999       LHSShift = NewLHSShift;
7000 
7001   // If a side is still missing, nothing else we can do.
7002   if (!RHSShift || !LHSShift)
7003     return SDValue();
7004 
7005   // At this point we've matched or extracted a shift op on each side.
7006 
7007   if (LHSShift.getOpcode() == RHSShift.getOpcode())
7008     return SDValue(); // Shifts must disagree.
7009 
7010   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7011   if (!IsRotate && !(HasFSHL || HasFSHR))
7012     return SDValue(); // Requires funnel shift support.
7013 
7014   // Canonicalize shl to left side in a shl/srl pair.
7015   if (RHSShift.getOpcode() == ISD::SHL) {
7016     std::swap(LHS, RHS);
7017     std::swap(LHSShift, RHSShift);
7018     std::swap(LHSMask, RHSMask);
7019   }
7020 
7021   unsigned EltSizeInBits = VT.getScalarSizeInBits();
7022   SDValue LHSShiftArg = LHSShift.getOperand(0);
7023   SDValue LHSShiftAmt = LHSShift.getOperand(1);
7024   SDValue RHSShiftArg = RHSShift.getOperand(0);
7025   SDValue RHSShiftAmt = RHSShift.getOperand(1);
7026 
7027   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7028   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7029   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7030   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7031   // iff C1+C2 == EltSizeInBits
7032   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7033                                         ConstantSDNode *RHS) {
7034     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7035   };
7036   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7037     SDValue Res;
7038     if (IsRotate && (HasROTL || HasROTR))
7039       Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7040                         HasROTL ? LHSShiftAmt : RHSShiftAmt);
7041     else
7042       Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7043                         RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
7044 
7045     // If there is an AND of either shifted operand, apply it to the result.
7046     if (LHSMask.getNode() || RHSMask.getNode()) {
7047       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7048       SDValue Mask = AllOnes;
7049 
7050       if (LHSMask.getNode()) {
7051         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7052         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7053                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7054       }
7055       if (RHSMask.getNode()) {
7056         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7057         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7058                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7059       }
7060 
7061       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7062     }
7063 
7064     return Res;
7065   }
7066 
7067   // If there is a mask here, and we have a variable shift, we can't be sure
7068   // that we're masking out the right stuff.
7069   if (LHSMask.getNode() || RHSMask.getNode())
7070     return SDValue();
7071 
7072   // If the shift amount is sign/zext/any-extended just peel it off.
7073   SDValue LExtOp0 = LHSShiftAmt;
7074   SDValue RExtOp0 = RHSShiftAmt;
7075   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7076        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7077        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7078        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7079       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7080        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7081        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7082        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7083     LExtOp0 = LHSShiftAmt.getOperand(0);
7084     RExtOp0 = RHSShiftAmt.getOperand(0);
7085   }
7086 
7087   if (IsRotate && (HasROTL || HasROTR)) {
7088     SDValue TryL =
7089         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7090                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7091     if (TryL)
7092       return TryL;
7093 
7094     SDValue TryR =
7095         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7096                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7097     if (TryR)
7098       return TryR;
7099   }
7100 
7101   SDValue TryL =
7102       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7103                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7104   if (TryL)
7105     return TryL;
7106 
7107   SDValue TryR =
7108       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7109                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7110   if (TryR)
7111     return TryR;
7112 
7113   return SDValue();
7114 }
7115 
7116 namespace {
7117 
7118 /// Represents known origin of an individual byte in load combine pattern. The
7119 /// value of the byte is either constant zero or comes from memory.
7120 struct ByteProvider {
7121   // For constant zero providers Load is set to nullptr. For memory providers
7122   // Load represents the node which loads the byte from memory.
7123   // ByteOffset is the offset of the byte in the value produced by the load.
7124   LoadSDNode *Load = nullptr;
7125   unsigned ByteOffset = 0;
7126 
7127   ByteProvider() = default;
7128 
7129   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7130     return ByteProvider(Load, ByteOffset);
7131   }
7132 
7133   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7134 
7135   bool isConstantZero() const { return !Load; }
7136   bool isMemory() const { return Load; }
7137 
7138   bool operator==(const ByteProvider &Other) const {
7139     return Other.Load == Load && Other.ByteOffset == ByteOffset;
7140   }
7141 
7142 private:
7143   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7144       : Load(Load), ByteOffset(ByteOffset) {}
7145 };
7146 
7147 } // end anonymous namespace
7148 
7149 /// Recursively traverses the expression calculating the origin of the requested
7150 /// byte of the given value. Returns None if the provider can't be calculated.
7151 ///
7152 /// For all the values except the root of the expression verifies that the value
7153 /// has exactly one use and if it's not true return None. This way if the origin
7154 /// of the byte is returned it's guaranteed that the values which contribute to
7155 /// the byte are not used outside of this expression.
7156 ///
7157 /// Because the parts of the expression are not allowed to have more than one
7158 /// use this function iterates over trees, not DAGs. So it never visits the same
7159 /// node more than once.
7160 static const Optional<ByteProvider>
7161 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7162                       bool Root = false) {
7163   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7164   if (Depth == 10)
7165     return None;
7166 
7167   if (!Root && !Op.hasOneUse())
7168     return None;
7169 
7170   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7171   unsigned BitWidth = Op.getValueSizeInBits();
7172   if (BitWidth % 8 != 0)
7173     return None;
7174   unsigned ByteWidth = BitWidth / 8;
7175   assert(Index < ByteWidth && "invalid index requested");
7176   (void) ByteWidth;
7177 
7178   switch (Op.getOpcode()) {
7179   case ISD::OR: {
7180     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7181     if (!LHS)
7182       return None;
7183     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7184     if (!RHS)
7185       return None;
7186 
7187     if (LHS->isConstantZero())
7188       return RHS;
7189     if (RHS->isConstantZero())
7190       return LHS;
7191     return None;
7192   }
7193   case ISD::SHL: {
7194     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7195     if (!ShiftOp)
7196       return None;
7197 
7198     uint64_t BitShift = ShiftOp->getZExtValue();
7199     if (BitShift % 8 != 0)
7200       return None;
7201     uint64_t ByteShift = BitShift / 8;
7202 
7203     return Index < ByteShift
7204                ? ByteProvider::getConstantZero()
7205                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7206                                        Depth + 1);
7207   }
7208   case ISD::ANY_EXTEND:
7209   case ISD::SIGN_EXTEND:
7210   case ISD::ZERO_EXTEND: {
7211     SDValue NarrowOp = Op->getOperand(0);
7212     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7213     if (NarrowBitWidth % 8 != 0)
7214       return None;
7215     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7216 
7217     if (Index >= NarrowByteWidth)
7218       return Op.getOpcode() == ISD::ZERO_EXTEND
7219                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7220                  : None;
7221     return calculateByteProvider(NarrowOp, Index, Depth + 1);
7222   }
7223   case ISD::BSWAP:
7224     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7225                                  Depth + 1);
7226   case ISD::LOAD: {
7227     auto L = cast<LoadSDNode>(Op.getNode());
7228     if (!L->isSimple() || L->isIndexed())
7229       return None;
7230 
7231     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7232     if (NarrowBitWidth % 8 != 0)
7233       return None;
7234     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7235 
7236     if (Index >= NarrowByteWidth)
7237       return L->getExtensionType() == ISD::ZEXTLOAD
7238                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7239                  : None;
7240     return ByteProvider::getMemory(L, Index);
7241   }
7242   }
7243 
7244   return None;
7245 }
7246 
7247 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7248   return i;
7249 }
7250 
7251 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7252   return BW - i - 1;
7253 }
7254 
7255 // Check if the bytes offsets we are looking at match with either big or
7256 // little endian value loaded. Return true for big endian, false for little
7257 // endian, and None if match failed.
7258 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7259                                   int64_t FirstOffset) {
7260   // The endian can be decided only when it is 2 bytes at least.
7261   unsigned Width = ByteOffsets.size();
7262   if (Width < 2)
7263     return None;
7264 
7265   bool BigEndian = true, LittleEndian = true;
7266   for (unsigned i = 0; i < Width; i++) {
7267     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7268     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7269     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7270     if (!BigEndian && !LittleEndian)
7271       return None;
7272   }
7273 
7274   assert((BigEndian != LittleEndian) && "It should be either big endian or"
7275                                         "little endian");
7276   return BigEndian;
7277 }
7278 
7279 static SDValue stripTruncAndExt(SDValue Value) {
7280   switch (Value.getOpcode()) {
7281   case ISD::TRUNCATE:
7282   case ISD::ZERO_EXTEND:
7283   case ISD::SIGN_EXTEND:
7284   case ISD::ANY_EXTEND:
7285     return stripTruncAndExt(Value.getOperand(0));
7286   }
7287   return Value;
7288 }
7289 
7290 /// Match a pattern where a wide type scalar value is stored by several narrow
7291 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7292 /// supports it.
7293 ///
7294 /// Assuming little endian target:
7295 ///  i8 *p = ...
7296 ///  i32 val = ...
7297 ///  p[0] = (val >> 0) & 0xFF;
7298 ///  p[1] = (val >> 8) & 0xFF;
7299 ///  p[2] = (val >> 16) & 0xFF;
7300 ///  p[3] = (val >> 24) & 0xFF;
7301 /// =>
7302 ///  *((i32)p) = val;
7303 ///
7304 ///  i8 *p = ...
7305 ///  i32 val = ...
7306 ///  p[0] = (val >> 24) & 0xFF;
7307 ///  p[1] = (val >> 16) & 0xFF;
7308 ///  p[2] = (val >> 8) & 0xFF;
7309 ///  p[3] = (val >> 0) & 0xFF;
7310 /// =>
7311 ///  *((i32)p) = BSWAP(val);
7312 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7313   // The matching looks for "store (trunc x)" patterns that appear early but are
7314   // likely to be replaced by truncating store nodes during combining.
7315   // TODO: If there is evidence that running this later would help, this
7316   //       limitation could be removed. Legality checks may need to be added
7317   //       for the created store and optional bswap/rotate.
7318   if (LegalOperations)
7319     return SDValue();
7320 
7321   // We only handle merging simple stores of 1-4 bytes.
7322   // TODO: Allow unordered atomics when wider type is legal (see D66309)
7323   EVT MemVT = N->getMemoryVT();
7324   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7325       !N->isSimple() || N->isIndexed())
7326     return SDValue();
7327 
7328   // Collect all of the stores in the chain.
7329   SDValue Chain = N->getChain();
7330   SmallVector<StoreSDNode *, 8> Stores = {N};
7331   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7332     // All stores must be the same size to ensure that we are writing all of the
7333     // bytes in the wide value.
7334     // TODO: We could allow multiple sizes by tracking each stored byte.
7335     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7336         Store->isIndexed())
7337       return SDValue();
7338     Stores.push_back(Store);
7339     Chain = Store->getChain();
7340   }
7341   // There is no reason to continue if we do not have at least a pair of stores.
7342   if (Stores.size() < 2)
7343     return SDValue();
7344 
7345   // Handle simple types only.
7346   LLVMContext &Context = *DAG.getContext();
7347   unsigned NumStores = Stores.size();
7348   unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7349   unsigned WideNumBits = NumStores * NarrowNumBits;
7350   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7351   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7352     return SDValue();
7353 
7354   // Check if all bytes of the source value that we are looking at are stored
7355   // to the same base address. Collect offsets from Base address into OffsetMap.
7356   SDValue SourceValue;
7357   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7358   int64_t FirstOffset = INT64_MAX;
7359   StoreSDNode *FirstStore = nullptr;
7360   Optional<BaseIndexOffset> Base;
7361   for (auto Store : Stores) {
7362     // All the stores store different parts of the CombinedValue. A truncate is
7363     // required to get the partial value.
7364     SDValue Trunc = Store->getValue();
7365     if (Trunc.getOpcode() != ISD::TRUNCATE)
7366       return SDValue();
7367     // Other than the first/last part, a shift operation is required to get the
7368     // offset.
7369     int64_t Offset = 0;
7370     SDValue WideVal = Trunc.getOperand(0);
7371     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7372         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7373       // The shift amount must be a constant multiple of the narrow type.
7374       // It is translated to the offset address in the wide source value "y".
7375       //
7376       // x = srl y, ShiftAmtC
7377       // i8 z = trunc x
7378       // store z, ...
7379       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7380       if (ShiftAmtC % NarrowNumBits != 0)
7381         return SDValue();
7382 
7383       Offset = ShiftAmtC / NarrowNumBits;
7384       WideVal = WideVal.getOperand(0);
7385     }
7386 
7387     // Stores must share the same source value with different offsets.
7388     // Truncate and extends should be stripped to get the single source value.
7389     if (!SourceValue)
7390       SourceValue = WideVal;
7391     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7392       return SDValue();
7393     else if (SourceValue.getValueType() != WideVT) {
7394       if (WideVal.getValueType() == WideVT ||
7395           WideVal.getScalarValueSizeInBits() >
7396               SourceValue.getScalarValueSizeInBits())
7397         SourceValue = WideVal;
7398       // Give up if the source value type is smaller than the store size.
7399       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7400         return SDValue();
7401     }
7402 
7403     // Stores must share the same base address.
7404     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7405     int64_t ByteOffsetFromBase = 0;
7406     if (!Base)
7407       Base = Ptr;
7408     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7409       return SDValue();
7410 
7411     // Remember the first store.
7412     if (ByteOffsetFromBase < FirstOffset) {
7413       FirstStore = Store;
7414       FirstOffset = ByteOffsetFromBase;
7415     }
7416     // Map the offset in the store and the offset in the combined value, and
7417     // early return if it has been set before.
7418     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7419       return SDValue();
7420     OffsetMap[Offset] = ByteOffsetFromBase;
7421   }
7422 
7423   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7424   assert(FirstStore && "First store must be set");
7425 
7426   // Check that a store of the wide type is both allowed and fast on the target
7427   const DataLayout &Layout = DAG.getDataLayout();
7428   bool Fast = false;
7429   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7430                                         *FirstStore->getMemOperand(), &Fast);
7431   if (!Allowed || !Fast)
7432     return SDValue();
7433 
7434   // Check if the pieces of the value are going to the expected places in memory
7435   // to merge the stores.
7436   auto checkOffsets = [&](bool MatchLittleEndian) {
7437     if (MatchLittleEndian) {
7438       for (unsigned i = 0; i != NumStores; ++i)
7439         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7440           return false;
7441     } else { // MatchBigEndian by reversing loop counter.
7442       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7443         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7444           return false;
7445     }
7446     return true;
7447   };
7448 
7449   // Check if the offsets line up for the native data layout of this target.
7450   bool NeedBswap = false;
7451   bool NeedRotate = false;
7452   if (!checkOffsets(Layout.isLittleEndian())) {
7453     // Special-case: check if byte offsets line up for the opposite endian.
7454     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7455       NeedBswap = true;
7456     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7457       NeedRotate = true;
7458     else
7459       return SDValue();
7460   }
7461 
7462   SDLoc DL(N);
7463   if (WideVT != SourceValue.getValueType()) {
7464     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7465            "Unexpected store value to merge");
7466     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7467   }
7468 
7469   // Before legalize we can introduce illegal bswaps/rotates which will be later
7470   // converted to an explicit bswap sequence. This way we end up with a single
7471   // store and byte shuffling instead of several stores and byte shuffling.
7472   if (NeedBswap) {
7473     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7474   } else if (NeedRotate) {
7475     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7476     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7477     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7478   }
7479 
7480   SDValue NewStore =
7481       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7482                    FirstStore->getPointerInfo(), FirstStore->getAlign());
7483 
7484   // Rely on other DAG combine rules to remove the other individual stores.
7485   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7486   return NewStore;
7487 }
7488 
7489 /// Match a pattern where a wide type scalar value is loaded by several narrow
7490 /// loads and combined by shifts and ors. Fold it into a single load or a load
7491 /// and a BSWAP if the targets supports it.
7492 ///
7493 /// Assuming little endian target:
7494 ///  i8 *a = ...
7495 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7496 /// =>
7497 ///  i32 val = *((i32)a)
7498 ///
7499 ///  i8 *a = ...
7500 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7501 /// =>
7502 ///  i32 val = BSWAP(*((i32)a))
7503 ///
7504 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7505 /// interact well with the worklist mechanism. When a part of the pattern is
7506 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7507 /// but the root node of the pattern which triggers the load combine is not
7508 /// necessarily a direct user of the changed node. For example, once the address
7509 /// of t28 load is reassociated load combine won't be triggered:
7510 ///             t25: i32 = add t4, Constant:i32<2>
7511 ///           t26: i64 = sign_extend t25
7512 ///        t27: i64 = add t2, t26
7513 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7514 ///     t29: i32 = zero_extend t28
7515 ///   t32: i32 = shl t29, Constant:i8<8>
7516 /// t33: i32 = or t23, t32
7517 /// As a possible fix visitLoad can check if the load can be a part of a load
7518 /// combine pattern and add corresponding OR roots to the worklist.
7519 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7520   assert(N->getOpcode() == ISD::OR &&
7521          "Can only match load combining against OR nodes");
7522 
7523   // Handles simple types only
7524   EVT VT = N->getValueType(0);
7525   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7526     return SDValue();
7527   unsigned ByteWidth = VT.getSizeInBits() / 8;
7528 
7529   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7530   auto MemoryByteOffset = [&] (ByteProvider P) {
7531     assert(P.isMemory() && "Must be a memory byte provider");
7532     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7533     assert(LoadBitWidth % 8 == 0 &&
7534            "can only analyze providers for individual bytes not bit");
7535     unsigned LoadByteWidth = LoadBitWidth / 8;
7536     return IsBigEndianTarget
7537             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7538             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7539   };
7540 
7541   Optional<BaseIndexOffset> Base;
7542   SDValue Chain;
7543 
7544   SmallPtrSet<LoadSDNode *, 8> Loads;
7545   Optional<ByteProvider> FirstByteProvider;
7546   int64_t FirstOffset = INT64_MAX;
7547 
7548   // Check if all the bytes of the OR we are looking at are loaded from the same
7549   // base address. Collect bytes offsets from Base address in ByteOffsets.
7550   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7551   unsigned ZeroExtendedBytes = 0;
7552   for (int i = ByteWidth - 1; i >= 0; --i) {
7553     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7554     if (!P)
7555       return SDValue();
7556 
7557     if (P->isConstantZero()) {
7558       // It's OK for the N most significant bytes to be 0, we can just
7559       // zero-extend the load.
7560       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7561         return SDValue();
7562       continue;
7563     }
7564     assert(P->isMemory() && "provenance should either be memory or zero");
7565 
7566     LoadSDNode *L = P->Load;
7567     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7568            !L->isIndexed() &&
7569            "Must be enforced by calculateByteProvider");
7570     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7571 
7572     // All loads must share the same chain
7573     SDValue LChain = L->getChain();
7574     if (!Chain)
7575       Chain = LChain;
7576     else if (Chain != LChain)
7577       return SDValue();
7578 
7579     // Loads must share the same base address
7580     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7581     int64_t ByteOffsetFromBase = 0;
7582     if (!Base)
7583       Base = Ptr;
7584     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7585       return SDValue();
7586 
7587     // Calculate the offset of the current byte from the base address
7588     ByteOffsetFromBase += MemoryByteOffset(*P);
7589     ByteOffsets[i] = ByteOffsetFromBase;
7590 
7591     // Remember the first byte load
7592     if (ByteOffsetFromBase < FirstOffset) {
7593       FirstByteProvider = P;
7594       FirstOffset = ByteOffsetFromBase;
7595     }
7596 
7597     Loads.insert(L);
7598   }
7599   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7600          "memory, so there must be at least one load which produces the value");
7601   assert(Base && "Base address of the accessed memory location must be set");
7602   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7603 
7604   bool NeedsZext = ZeroExtendedBytes > 0;
7605 
7606   EVT MemVT =
7607       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7608 
7609   if (!MemVT.isSimple())
7610     return SDValue();
7611 
7612   // Before legalize we can introduce too wide illegal loads which will be later
7613   // split into legal sized loads. This enables us to combine i64 load by i8
7614   // patterns to a couple of i32 loads on 32 bit targets.
7615   if (LegalOperations &&
7616       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7617                             MemVT))
7618     return SDValue();
7619 
7620   // Check if the bytes of the OR we are looking at match with either big or
7621   // little endian value load
7622   Optional<bool> IsBigEndian = isBigEndian(
7623       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7624   if (!IsBigEndian.hasValue())
7625     return SDValue();
7626 
7627   assert(FirstByteProvider && "must be set");
7628 
7629   // Ensure that the first byte is loaded from zero offset of the first load.
7630   // So the combined value can be loaded from the first load address.
7631   if (MemoryByteOffset(*FirstByteProvider) != 0)
7632     return SDValue();
7633   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7634 
7635   // The node we are looking at matches with the pattern, check if we can
7636   // replace it with a single (possibly zero-extended) load and bswap + shift if
7637   // needed.
7638 
7639   // If the load needs byte swap check if the target supports it
7640   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7641 
7642   // Before legalize we can introduce illegal bswaps which will be later
7643   // converted to an explicit bswap sequence. This way we end up with a single
7644   // load and byte shuffling instead of several loads and byte shuffling.
7645   // We do not introduce illegal bswaps when zero-extending as this tends to
7646   // introduce too many arithmetic instructions.
7647   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7648       !TLI.isOperationLegal(ISD::BSWAP, VT))
7649     return SDValue();
7650 
7651   // If we need to bswap and zero extend, we have to insert a shift. Check that
7652   // it is legal.
7653   if (NeedsBswap && NeedsZext && LegalOperations &&
7654       !TLI.isOperationLegal(ISD::SHL, VT))
7655     return SDValue();
7656 
7657   // Check that a load of the wide type is both allowed and fast on the target
7658   bool Fast = false;
7659   bool Allowed =
7660       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7661                              *FirstLoad->getMemOperand(), &Fast);
7662   if (!Allowed || !Fast)
7663     return SDValue();
7664 
7665   SDValue NewLoad =
7666       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7667                      Chain, FirstLoad->getBasePtr(),
7668                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7669 
7670   // Transfer chain users from old loads to the new load.
7671   for (LoadSDNode *L : Loads)
7672     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7673 
7674   if (!NeedsBswap)
7675     return NewLoad;
7676 
7677   SDValue ShiftedLoad =
7678       NeedsZext
7679           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7680                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7681                                                    SDLoc(N), LegalOperations))
7682           : NewLoad;
7683   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7684 }
7685 
7686 // If the target has andn, bsl, or a similar bit-select instruction,
7687 // we want to unfold masked merge, with canonical pattern of:
7688 //   |        A  |  |B|
7689 //   ((x ^ y) & m) ^ y
7690 //    |  D  |
7691 // Into:
7692 //   (x & m) | (y & ~m)
7693 // If y is a constant, and the 'andn' does not work with immediates,
7694 // we unfold into a different pattern:
7695 //   ~(~x & m) & (m | y)
7696 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7697 //       the very least that breaks andnpd / andnps patterns, and because those
7698 //       patterns are simplified in IR and shouldn't be created in the DAG
7699 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7700   assert(N->getOpcode() == ISD::XOR);
7701 
7702   // Don't touch 'not' (i.e. where y = -1).
7703   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7704     return SDValue();
7705 
7706   EVT VT = N->getValueType(0);
7707 
7708   // There are 3 commutable operators in the pattern,
7709   // so we have to deal with 8 possible variants of the basic pattern.
7710   SDValue X, Y, M;
7711   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7712     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7713       return false;
7714     SDValue Xor = And.getOperand(XorIdx);
7715     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7716       return false;
7717     SDValue Xor0 = Xor.getOperand(0);
7718     SDValue Xor1 = Xor.getOperand(1);
7719     // Don't touch 'not' (i.e. where y = -1).
7720     if (isAllOnesOrAllOnesSplat(Xor1))
7721       return false;
7722     if (Other == Xor0)
7723       std::swap(Xor0, Xor1);
7724     if (Other != Xor1)
7725       return false;
7726     X = Xor0;
7727     Y = Xor1;
7728     M = And.getOperand(XorIdx ? 0 : 1);
7729     return true;
7730   };
7731 
7732   SDValue N0 = N->getOperand(0);
7733   SDValue N1 = N->getOperand(1);
7734   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7735       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7736     return SDValue();
7737 
7738   // Don't do anything if the mask is constant. This should not be reachable.
7739   // InstCombine should have already unfolded this pattern, and DAGCombiner
7740   // probably shouldn't produce it, too.
7741   if (isa<ConstantSDNode>(M.getNode()))
7742     return SDValue();
7743 
7744   // We can transform if the target has AndNot
7745   if (!TLI.hasAndNot(M))
7746     return SDValue();
7747 
7748   SDLoc DL(N);
7749 
7750   // If Y is a constant, check that 'andn' works with immediates.
7751   if (!TLI.hasAndNot(Y)) {
7752     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7753     // If not, we need to do a bit more work to make sure andn is still used.
7754     SDValue NotX = DAG.getNOT(DL, X, VT);
7755     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7756     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7757     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7758     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7759   }
7760 
7761   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7762   SDValue NotM = DAG.getNOT(DL, M, VT);
7763   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7764 
7765   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7766 }
7767 
7768 SDValue DAGCombiner::visitXOR(SDNode *N) {
7769   SDValue N0 = N->getOperand(0);
7770   SDValue N1 = N->getOperand(1);
7771   EVT VT = N0.getValueType();
7772 
7773   // fold vector ops
7774   if (VT.isVector()) {
7775     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7776       return FoldedVOp;
7777 
7778     // fold (xor x, 0) -> x, vector edition
7779     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
7780       return N1;
7781     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7782       return N0;
7783   }
7784 
7785   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7786   SDLoc DL(N);
7787   if (N0.isUndef() && N1.isUndef())
7788     return DAG.getConstant(0, DL, VT);
7789 
7790   // fold (xor x, undef) -> undef
7791   if (N0.isUndef())
7792     return N0;
7793   if (N1.isUndef())
7794     return N1;
7795 
7796   // fold (xor c1, c2) -> c1^c2
7797   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7798     return C;
7799 
7800   // canonicalize constant to RHS
7801   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7802      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7803     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7804 
7805   // fold (xor x, 0) -> x
7806   if (isNullConstant(N1))
7807     return N0;
7808 
7809   if (SDValue NewSel = foldBinOpIntoSelect(N))
7810     return NewSel;
7811 
7812   // reassociate xor
7813   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7814     return RXOR;
7815 
7816   // fold !(x cc y) -> (x !cc y)
7817   unsigned N0Opcode = N0.getOpcode();
7818   SDValue LHS, RHS, CC;
7819   if (TLI.isConstTrueVal(N1.getNode()) &&
7820       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7821     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7822                                                LHS.getValueType());
7823     if (!LegalOperations ||
7824         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7825       switch (N0Opcode) {
7826       default:
7827         llvm_unreachable("Unhandled SetCC Equivalent!");
7828       case ISD::SETCC:
7829         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7830       case ISD::SELECT_CC:
7831         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7832                                N0.getOperand(3), NotCC);
7833       case ISD::STRICT_FSETCC:
7834       case ISD::STRICT_FSETCCS: {
7835         if (N0.hasOneUse()) {
7836           // FIXME Can we handle multiple uses? Could we token factor the chain
7837           // results from the new/old setcc?
7838           SDValue SetCC =
7839               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7840                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7841           CombineTo(N, SetCC);
7842           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7843           recursivelyDeleteUnusedNodes(N0.getNode());
7844           return SDValue(N, 0); // Return N so it doesn't get rechecked!
7845         }
7846         break;
7847       }
7848       }
7849     }
7850   }
7851 
7852   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7853   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7854       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7855     SDValue V = N0.getOperand(0);
7856     SDLoc DL0(N0);
7857     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7858                     DAG.getConstant(1, DL0, V.getValueType()));
7859     AddToWorklist(V.getNode());
7860     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7861   }
7862 
7863   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7864   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7865       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7866     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7867     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7868       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7869       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7870       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7871       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7872       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7873     }
7874   }
7875   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7876   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7877       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7878     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7879     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7880       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7881       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7882       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7883       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7884       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7885     }
7886   }
7887 
7888   // fold (not (neg x)) -> (add X, -1)
7889   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7890   // Y is a constant or the subtract has a single use.
7891   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7892       isNullConstant(N0.getOperand(0))) {
7893     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7894                        DAG.getAllOnesConstant(DL, VT));
7895   }
7896 
7897   // fold (not (add X, -1)) -> (neg X)
7898   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7899       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7900     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7901                        N0.getOperand(0));
7902   }
7903 
7904   // fold (xor (and x, y), y) -> (and (not x), y)
7905   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7906     SDValue X = N0.getOperand(0);
7907     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7908     AddToWorklist(NotX.getNode());
7909     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7910   }
7911 
7912   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7913     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7914     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7915     unsigned BitWidth = VT.getScalarSizeInBits();
7916     if (XorC && ShiftC) {
7917       // Don't crash on an oversized shift. We can not guarantee that a bogus
7918       // shift has been simplified to undef.
7919       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7920       if (ShiftAmt < BitWidth) {
7921         APInt Ones = APInt::getAllOnes(BitWidth);
7922         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7923         if (XorC->getAPIntValue() == Ones) {
7924           // If the xor constant is a shifted -1, do a 'not' before the shift:
7925           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7926           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7927           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7928           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7929         }
7930       }
7931     }
7932   }
7933 
7934   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7935   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7936     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7937     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7938     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7939       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7940       SDValue S0 = S.getOperand(0);
7941       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
7942         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7943           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
7944             return DAG.getNode(ISD::ABS, DL, VT, S0);
7945     }
7946   }
7947 
7948   // fold (xor x, x) -> 0
7949   if (N0 == N1)
7950     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7951 
7952   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7953   // Here is a concrete example of this equivalence:
7954   // i16   x ==  14
7955   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7956   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7957   //
7958   // =>
7959   //
7960   // i16     ~1      == 0b1111111111111110
7961   // i16 rol(~1, 14) == 0b1011111111111111
7962   //
7963   // Some additional tips to help conceptualize this transform:
7964   // - Try to see the operation as placing a single zero in a value of all ones.
7965   // - There exists no value for x which would allow the result to contain zero.
7966   // - Values of x larger than the bitwidth are undefined and do not require a
7967   //   consistent result.
7968   // - Pushing the zero left requires shifting one bits in from the right.
7969   // A rotate left of ~1 is a nice way of achieving the desired result.
7970   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7971       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7972     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7973                        N0.getOperand(1));
7974   }
7975 
7976   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7977   if (N0Opcode == N1.getOpcode())
7978     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7979       return V;
7980 
7981   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7982   if (SDValue MM = unfoldMaskedMerge(N))
7983     return MM;
7984 
7985   // Simplify the expression using non-local knowledge.
7986   if (SimplifyDemandedBits(SDValue(N, 0)))
7987     return SDValue(N, 0);
7988 
7989   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7990     return Combined;
7991 
7992   return SDValue();
7993 }
7994 
7995 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7996 /// shift-by-constant operand with identical opcode, we may be able to convert
7997 /// that into 2 independent shifts followed by the logic op. This is a
7998 /// throughput improvement.
7999 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
8000   // Match a one-use bitwise logic op.
8001   SDValue LogicOp = Shift->getOperand(0);
8002   if (!LogicOp.hasOneUse())
8003     return SDValue();
8004 
8005   unsigned LogicOpcode = LogicOp.getOpcode();
8006   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8007       LogicOpcode != ISD::XOR)
8008     return SDValue();
8009 
8010   // Find a matching one-use shift by constant.
8011   unsigned ShiftOpcode = Shift->getOpcode();
8012   SDValue C1 = Shift->getOperand(1);
8013   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8014   assert(C1Node && "Expected a shift with constant operand");
8015   const APInt &C1Val = C1Node->getAPIntValue();
8016   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8017                              const APInt *&ShiftAmtVal) {
8018     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8019       return false;
8020 
8021     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8022     if (!ShiftCNode)
8023       return false;
8024 
8025     // Capture the shifted operand and shift amount value.
8026     ShiftOp = V.getOperand(0);
8027     ShiftAmtVal = &ShiftCNode->getAPIntValue();
8028 
8029     // Shift amount types do not have to match their operand type, so check that
8030     // the constants are the same width.
8031     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8032       return false;
8033 
8034     // The fold is not valid if the sum of the shift values exceeds bitwidth.
8035     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8036       return false;
8037 
8038     return true;
8039   };
8040 
8041   // Logic ops are commutative, so check each operand for a match.
8042   SDValue X, Y;
8043   const APInt *C0Val;
8044   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8045     Y = LogicOp.getOperand(1);
8046   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8047     Y = LogicOp.getOperand(0);
8048   else
8049     return SDValue();
8050 
8051   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8052   SDLoc DL(Shift);
8053   EVT VT = Shift->getValueType(0);
8054   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8055   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8056   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8057   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8058   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8059 }
8060 
8061 /// Handle transforms common to the three shifts, when the shift amount is a
8062 /// constant.
8063 /// We are looking for: (shift being one of shl/sra/srl)
8064 ///   shift (binop X, C0), C1
8065 /// And want to transform into:
8066 ///   binop (shift X, C1), (shift C0, C1)
8067 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8068   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8069 
8070   // Do not turn a 'not' into a regular xor.
8071   if (isBitwiseNot(N->getOperand(0)))
8072     return SDValue();
8073 
8074   // The inner binop must be one-use, since we want to replace it.
8075   SDValue LHS = N->getOperand(0);
8076   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8077     return SDValue();
8078 
8079   // TODO: This is limited to early combining because it may reveal regressions
8080   //       otherwise. But since we just checked a target hook to see if this is
8081   //       desirable, that should have filtered out cases where this interferes
8082   //       with some other pattern matching.
8083   if (!LegalTypes)
8084     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8085       return R;
8086 
8087   // We want to pull some binops through shifts, so that we have (and (shift))
8088   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
8089   // thing happens with address calculations, so it's important to canonicalize
8090   // it.
8091   switch (LHS.getOpcode()) {
8092   default:
8093     return SDValue();
8094   case ISD::OR:
8095   case ISD::XOR:
8096   case ISD::AND:
8097     break;
8098   case ISD::ADD:
8099     if (N->getOpcode() != ISD::SHL)
8100       return SDValue(); // only shl(add) not sr[al](add).
8101     break;
8102   }
8103 
8104   // We require the RHS of the binop to be a constant and not opaque as well.
8105   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8106   if (!BinOpCst)
8107     return SDValue();
8108 
8109   // FIXME: disable this unless the input to the binop is a shift by a constant
8110   // or is copy/select. Enable this in other cases when figure out it's exactly
8111   // profitable.
8112   SDValue BinOpLHSVal = LHS.getOperand(0);
8113   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8114                             BinOpLHSVal.getOpcode() == ISD::SRA ||
8115                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
8116                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8117   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8118                         BinOpLHSVal.getOpcode() == ISD::SELECT;
8119 
8120   if (!IsShiftByConstant && !IsCopyOrSelect)
8121     return SDValue();
8122 
8123   if (IsCopyOrSelect && N->hasOneUse())
8124     return SDValue();
8125 
8126   // Fold the constants, shifting the binop RHS by the shift amount.
8127   SDLoc DL(N);
8128   EVT VT = N->getValueType(0);
8129   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8130                                N->getOperand(1));
8131   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8132 
8133   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8134                                  N->getOperand(1));
8135   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8136 }
8137 
8138 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8139   assert(N->getOpcode() == ISD::TRUNCATE);
8140   assert(N->getOperand(0).getOpcode() == ISD::AND);
8141 
8142   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8143   EVT TruncVT = N->getValueType(0);
8144   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8145       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8146     SDValue N01 = N->getOperand(0).getOperand(1);
8147     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8148       SDLoc DL(N);
8149       SDValue N00 = N->getOperand(0).getOperand(0);
8150       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8151       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8152       AddToWorklist(Trunc00.getNode());
8153       AddToWorklist(Trunc01.getNode());
8154       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8155     }
8156   }
8157 
8158   return SDValue();
8159 }
8160 
8161 SDValue DAGCombiner::visitRotate(SDNode *N) {
8162   SDLoc dl(N);
8163   SDValue N0 = N->getOperand(0);
8164   SDValue N1 = N->getOperand(1);
8165   EVT VT = N->getValueType(0);
8166   unsigned Bitsize = VT.getScalarSizeInBits();
8167 
8168   // fold (rot x, 0) -> x
8169   if (isNullOrNullSplat(N1))
8170     return N0;
8171 
8172   // fold (rot x, c) -> x iff (c % BitSize) == 0
8173   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8174     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8175     if (DAG.MaskedValueIsZero(N1, ModuloMask))
8176       return N0;
8177   }
8178 
8179   // fold (rot x, c) -> (rot x, c % BitSize)
8180   bool OutOfRange = false;
8181   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8182     OutOfRange |= C->getAPIntValue().uge(Bitsize);
8183     return true;
8184   };
8185   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8186     EVT AmtVT = N1.getValueType();
8187     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8188     if (SDValue Amt =
8189             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8190       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8191   }
8192 
8193   // rot i16 X, 8 --> bswap X
8194   auto *RotAmtC = isConstOrConstSplat(N1);
8195   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8196       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8197     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8198 
8199   // Simplify the operands using demanded-bits information.
8200   if (SimplifyDemandedBits(SDValue(N, 0)))
8201     return SDValue(N, 0);
8202 
8203   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8204   if (N1.getOpcode() == ISD::TRUNCATE &&
8205       N1.getOperand(0).getOpcode() == ISD::AND) {
8206     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8207       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8208   }
8209 
8210   unsigned NextOp = N0.getOpcode();
8211   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8212   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8213     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8214     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8215     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8216       EVT ShiftVT = C1->getValueType(0);
8217       bool SameSide = (N->getOpcode() == NextOp);
8218       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8219       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8220               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8221         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8222         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8223             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8224         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8225                            CombinedShiftNorm);
8226       }
8227     }
8228   }
8229   return SDValue();
8230 }
8231 
8232 SDValue DAGCombiner::visitSHL(SDNode *N) {
8233   SDValue N0 = N->getOperand(0);
8234   SDValue N1 = N->getOperand(1);
8235   if (SDValue V = DAG.simplifyShift(N0, N1))
8236     return V;
8237 
8238   EVT VT = N0.getValueType();
8239   EVT ShiftVT = N1.getValueType();
8240   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8241 
8242   // fold vector ops
8243   if (VT.isVector()) {
8244     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8245       return FoldedVOp;
8246 
8247     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8248     // If setcc produces all-one true value then:
8249     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8250     if (N1CV && N1CV->isConstant()) {
8251       if (N0.getOpcode() == ISD::AND) {
8252         SDValue N00 = N0->getOperand(0);
8253         SDValue N01 = N0->getOperand(1);
8254         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8255 
8256         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8257             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8258                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8259           if (SDValue C =
8260                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8261             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8262         }
8263       }
8264     }
8265   }
8266 
8267   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8268 
8269   // fold (shl c1, c2) -> c1<<c2
8270   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8271     return C;
8272 
8273   if (SDValue NewSel = foldBinOpIntoSelect(N))
8274     return NewSel;
8275 
8276   // if (shl x, c) is known to be zero, return 0
8277   if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8278     return DAG.getConstant(0, SDLoc(N), VT);
8279 
8280   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8281   if (N1.getOpcode() == ISD::TRUNCATE &&
8282       N1.getOperand(0).getOpcode() == ISD::AND) {
8283     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8284       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8285   }
8286 
8287   if (SimplifyDemandedBits(SDValue(N, 0)))
8288     return SDValue(N, 0);
8289 
8290   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8291   if (N0.getOpcode() == ISD::SHL) {
8292     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8293                                           ConstantSDNode *RHS) {
8294       APInt c1 = LHS->getAPIntValue();
8295       APInt c2 = RHS->getAPIntValue();
8296       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8297       return (c1 + c2).uge(OpSizeInBits);
8298     };
8299     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8300       return DAG.getConstant(0, SDLoc(N), VT);
8301 
8302     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8303                                        ConstantSDNode *RHS) {
8304       APInt c1 = LHS->getAPIntValue();
8305       APInt c2 = RHS->getAPIntValue();
8306       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8307       return (c1 + c2).ult(OpSizeInBits);
8308     };
8309     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8310       SDLoc DL(N);
8311       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8312       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8313     }
8314   }
8315 
8316   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8317   // For this to be valid, the second form must not preserve any of the bits
8318   // that are shifted out by the inner shift in the first form.  This means
8319   // the outer shift size must be >= the number of bits added by the ext.
8320   // As a corollary, we don't care what kind of ext it is.
8321   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8322        N0.getOpcode() == ISD::ANY_EXTEND ||
8323        N0.getOpcode() == ISD::SIGN_EXTEND) &&
8324       N0.getOperand(0).getOpcode() == ISD::SHL) {
8325     SDValue N0Op0 = N0.getOperand(0);
8326     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8327     EVT InnerVT = N0Op0.getValueType();
8328     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8329 
8330     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8331                                                          ConstantSDNode *RHS) {
8332       APInt c1 = LHS->getAPIntValue();
8333       APInt c2 = RHS->getAPIntValue();
8334       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8335       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8336              (c1 + c2).uge(OpSizeInBits);
8337     };
8338     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8339                                   /*AllowUndefs*/ false,
8340                                   /*AllowTypeMismatch*/ true))
8341       return DAG.getConstant(0, SDLoc(N), VT);
8342 
8343     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8344                                                       ConstantSDNode *RHS) {
8345       APInt c1 = LHS->getAPIntValue();
8346       APInt c2 = RHS->getAPIntValue();
8347       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8348       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8349              (c1 + c2).ult(OpSizeInBits);
8350     };
8351     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8352                                   /*AllowUndefs*/ false,
8353                                   /*AllowTypeMismatch*/ true)) {
8354       SDLoc DL(N);
8355       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8356       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8357       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8358       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8359     }
8360   }
8361 
8362   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8363   // Only fold this if the inner zext has no other uses to avoid increasing
8364   // the total number of instructions.
8365   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8366       N0.getOperand(0).getOpcode() == ISD::SRL) {
8367     SDValue N0Op0 = N0.getOperand(0);
8368     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8369 
8370     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8371       APInt c1 = LHS->getAPIntValue();
8372       APInt c2 = RHS->getAPIntValue();
8373       zeroExtendToMatch(c1, c2);
8374       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8375     };
8376     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8377                                   /*AllowUndefs*/ false,
8378                                   /*AllowTypeMismatch*/ true)) {
8379       SDLoc DL(N);
8380       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8381       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8382       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8383       AddToWorklist(NewSHL.getNode());
8384       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8385     }
8386   }
8387 
8388   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
8389   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
8390   // TODO - support non-uniform vector shift amounts.
8391   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8392       N0->getFlags().hasExact()) {
8393     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8394       uint64_t C1 = N0C1->getZExtValue();
8395       uint64_t C2 = N1C->getZExtValue();
8396       SDLoc DL(N);
8397       if (C1 <= C2)
8398         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8399                            DAG.getConstant(C2 - C1, DL, ShiftVT));
8400       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8401                          DAG.getConstant(C1 - C2, DL, ShiftVT));
8402     }
8403   }
8404 
8405   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8406   //                               (and (srl x, (sub c1, c2), MASK)
8407   // Only fold this if the inner shift has no other uses -- if it does, folding
8408   // this will increase the total number of instructions.
8409   // TODO - drop hasOneUse requirement if c1 == c2?
8410   // TODO - support non-uniform vector shift amounts.
8411   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8412       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8413     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8414       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8415         uint64_t c1 = N0C1->getZExtValue();
8416         uint64_t c2 = N1C->getZExtValue();
8417         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8418         SDValue Shift;
8419         if (c2 > c1) {
8420           Mask <<= c2 - c1;
8421           SDLoc DL(N);
8422           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8423                               DAG.getConstant(c2 - c1, DL, ShiftVT));
8424         } else {
8425           Mask.lshrInPlace(c1 - c2);
8426           SDLoc DL(N);
8427           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8428                               DAG.getConstant(c1 - c2, DL, ShiftVT));
8429         }
8430         SDLoc DL(N0);
8431         return DAG.getNode(ISD::AND, DL, VT, Shift,
8432                            DAG.getConstant(Mask, DL, VT));
8433       }
8434     }
8435   }
8436 
8437   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8438   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8439       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8440     SDLoc DL(N);
8441     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8442     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8443     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8444   }
8445 
8446   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8447   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8448   // Variant of version done on multiply, except mul by a power of 2 is turned
8449   // into a shift.
8450   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8451       N0.getNode()->hasOneUse() &&
8452       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8453       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8454       TLI.isDesirableToCommuteWithShift(N, Level)) {
8455     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8456     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8457     AddToWorklist(Shl0.getNode());
8458     AddToWorklist(Shl1.getNode());
8459     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8460   }
8461 
8462   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8463   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8464       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8465       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8466     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8467     if (isConstantOrConstantVector(Shl))
8468       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8469   }
8470 
8471   if (N1C && !N1C->isOpaque())
8472     if (SDValue NewSHL = visitShiftByConstant(N))
8473       return NewSHL;
8474 
8475   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8476   if (N0.getOpcode() == ISD::VSCALE)
8477     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8478       const APInt &C0 = N0.getConstantOperandAPInt(0);
8479       const APInt &C1 = NC1->getAPIntValue();
8480       return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8481     }
8482 
8483   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8484   APInt ShlVal;
8485   if (N0.getOpcode() == ISD::STEP_VECTOR)
8486     if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8487       const APInt &C0 = N0.getConstantOperandAPInt(0);
8488       if (ShlVal.ult(C0.getBitWidth())) {
8489         APInt NewStep = C0 << ShlVal;
8490         return DAG.getStepVector(SDLoc(N), VT, NewStep);
8491       }
8492     }
8493 
8494   return SDValue();
8495 }
8496 
8497 // Transform a right shift of a multiply into a multiply-high.
8498 // Examples:
8499 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8500 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8501 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8502                                   const TargetLowering &TLI) {
8503   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8504          "SRL or SRA node is required here!");
8505 
8506   // Check the shift amount. Proceed with the transformation if the shift
8507   // amount is constant.
8508   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8509   if (!ShiftAmtSrc)
8510     return SDValue();
8511 
8512   SDLoc DL(N);
8513 
8514   // The operation feeding into the shift must be a multiply.
8515   SDValue ShiftOperand = N->getOperand(0);
8516   if (ShiftOperand.getOpcode() != ISD::MUL)
8517     return SDValue();
8518 
8519   // Both operands must be equivalent extend nodes.
8520   SDValue LeftOp = ShiftOperand.getOperand(0);
8521   SDValue RightOp = ShiftOperand.getOperand(1);
8522   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8523   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8524 
8525   if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8526     return SDValue();
8527 
8528   EVT WideVT = LeftOp.getValueType();
8529   // Proceed with the transformation if the wide types match.
8530   assert((WideVT == RightOp.getValueType()) &&
8531          "Cannot have a multiply node with two different operand types.");
8532 
8533   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8534   // Check that the two extend nodes are the same type.
8535   if (NarrowVT != RightOp.getOperand(0).getValueType())
8536     return SDValue();
8537 
8538   // Proceed with the transformation if the wide type is twice as large
8539   // as the narrow type.
8540   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8541   if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
8542     return SDValue();
8543 
8544   // Check the shift amount with the narrow type size.
8545   // Proceed with the transformation if the shift amount is the width
8546   // of the narrow type.
8547   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8548   if (ShiftAmt != NarrowVTSize)
8549     return SDValue();
8550 
8551   // If the operation feeding into the MUL is a sign extend (sext),
8552   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8553   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8554 
8555   // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8556   if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8557     return SDValue();
8558 
8559   SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8560                                RightOp.getOperand(0));
8561   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
8562                                      : DAG.getZExtOrTrunc(Result, DL, WideVT));
8563 }
8564 
8565 SDValue DAGCombiner::visitSRA(SDNode *N) {
8566   SDValue N0 = N->getOperand(0);
8567   SDValue N1 = N->getOperand(1);
8568   if (SDValue V = DAG.simplifyShift(N0, N1))
8569     return V;
8570 
8571   EVT VT = N0.getValueType();
8572   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8573 
8574   // Arithmetic shifting an all-sign-bit value is a no-op.
8575   // fold (sra 0, x) -> 0
8576   // fold (sra -1, x) -> -1
8577   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8578     return N0;
8579 
8580   // fold vector ops
8581   if (VT.isVector())
8582     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8583       return FoldedVOp;
8584 
8585   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8586 
8587   // fold (sra c1, c2) -> (sra c1, c2)
8588   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8589     return C;
8590 
8591   if (SDValue NewSel = foldBinOpIntoSelect(N))
8592     return NewSel;
8593 
8594   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8595   // sext_inreg.
8596   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8597     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8598     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8599     if (VT.isVector())
8600       ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8601                                VT.getVectorElementCount());
8602     if (!LegalOperations ||
8603         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8604         TargetLowering::Legal)
8605       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8606                          N0.getOperand(0), DAG.getValueType(ExtVT));
8607     // Even if we can't convert to sext_inreg, we might be able to remove
8608     // this shift pair if the input is already sign extended.
8609     if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8610       return N0.getOperand(0);
8611   }
8612 
8613   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8614   // clamp (add c1, c2) to max shift.
8615   if (N0.getOpcode() == ISD::SRA) {
8616     SDLoc DL(N);
8617     EVT ShiftVT = N1.getValueType();
8618     EVT ShiftSVT = ShiftVT.getScalarType();
8619     SmallVector<SDValue, 16> ShiftValues;
8620 
8621     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8622       APInt c1 = LHS->getAPIntValue();
8623       APInt c2 = RHS->getAPIntValue();
8624       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8625       APInt Sum = c1 + c2;
8626       unsigned ShiftSum =
8627           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8628       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8629       return true;
8630     };
8631     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8632       SDValue ShiftValue;
8633       if (N1.getOpcode() == ISD::BUILD_VECTOR)
8634         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8635       else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
8636         assert(ShiftValues.size() == 1 &&
8637                "Expected matchBinaryPredicate to return one element for "
8638                "SPLAT_VECTORs");
8639         ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
8640       } else
8641         ShiftValue = ShiftValues[0];
8642       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8643     }
8644   }
8645 
8646   // fold (sra (shl X, m), (sub result_size, n))
8647   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8648   // result_size - n != m.
8649   // If truncate is free for the target sext(shl) is likely to result in better
8650   // code.
8651   if (N0.getOpcode() == ISD::SHL && N1C) {
8652     // Get the two constanst of the shifts, CN0 = m, CN = n.
8653     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8654     if (N01C) {
8655       LLVMContext &Ctx = *DAG.getContext();
8656       // Determine what the truncate's result bitsize and type would be.
8657       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8658 
8659       if (VT.isVector())
8660         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8661 
8662       // Determine the residual right-shift amount.
8663       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8664 
8665       // If the shift is not a no-op (in which case this should be just a sign
8666       // extend already), the truncated to type is legal, sign_extend is legal
8667       // on that type, and the truncate to that type is both legal and free,
8668       // perform the transform.
8669       if ((ShiftAmt > 0) &&
8670           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8671           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8672           TLI.isTruncateFree(VT, TruncVT)) {
8673         SDLoc DL(N);
8674         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8675             getShiftAmountTy(N0.getOperand(0).getValueType()));
8676         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8677                                     N0.getOperand(0), Amt);
8678         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8679                                     Shift);
8680         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8681                            N->getValueType(0), Trunc);
8682       }
8683     }
8684   }
8685 
8686   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8687   //   sra (add (shl X, N1C), AddC), N1C -->
8688   //   sext (add (trunc X to (width - N1C)), AddC')
8689   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8690       N0.getOperand(0).getOpcode() == ISD::SHL &&
8691       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8692     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8693       SDValue Shl = N0.getOperand(0);
8694       // Determine what the truncate's type would be and ask the target if that
8695       // is a free operation.
8696       LLVMContext &Ctx = *DAG.getContext();
8697       unsigned ShiftAmt = N1C->getZExtValue();
8698       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8699       if (VT.isVector())
8700         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8701 
8702       // TODO: The simple type check probably belongs in the default hook
8703       //       implementation and/or target-specific overrides (because
8704       //       non-simple types likely require masking when legalized), but that
8705       //       restriction may conflict with other transforms.
8706       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8707           TLI.isTruncateFree(VT, TruncVT)) {
8708         SDLoc DL(N);
8709         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8710         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8711                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8712         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8713         return DAG.getSExtOrTrunc(Add, DL, VT);
8714       }
8715     }
8716   }
8717 
8718   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8719   if (N1.getOpcode() == ISD::TRUNCATE &&
8720       N1.getOperand(0).getOpcode() == ISD::AND) {
8721     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8722       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8723   }
8724 
8725   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8726   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8727   //      if c1 is equal to the number of bits the trunc removes
8728   // TODO - support non-uniform vector shift amounts.
8729   if (N0.getOpcode() == ISD::TRUNCATE &&
8730       (N0.getOperand(0).getOpcode() == ISD::SRL ||
8731        N0.getOperand(0).getOpcode() == ISD::SRA) &&
8732       N0.getOperand(0).hasOneUse() &&
8733       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8734     SDValue N0Op0 = N0.getOperand(0);
8735     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8736       EVT LargeVT = N0Op0.getValueType();
8737       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8738       if (LargeShift->getAPIntValue() == TruncBits) {
8739         SDLoc DL(N);
8740         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8741                                       getShiftAmountTy(LargeVT));
8742         SDValue SRA =
8743             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8744         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8745       }
8746     }
8747   }
8748 
8749   // Simplify, based on bits shifted out of the LHS.
8750   if (SimplifyDemandedBits(SDValue(N, 0)))
8751     return SDValue(N, 0);
8752 
8753   // If the sign bit is known to be zero, switch this to a SRL.
8754   if (DAG.SignBitIsZero(N0))
8755     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8756 
8757   if (N1C && !N1C->isOpaque())
8758     if (SDValue NewSRA = visitShiftByConstant(N))
8759       return NewSRA;
8760 
8761   // Try to transform this shift into a multiply-high if
8762   // it matches the appropriate pattern detected in combineShiftToMULH.
8763   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8764     return MULH;
8765 
8766   return SDValue();
8767 }
8768 
8769 SDValue DAGCombiner::visitSRL(SDNode *N) {
8770   SDValue N0 = N->getOperand(0);
8771   SDValue N1 = N->getOperand(1);
8772   if (SDValue V = DAG.simplifyShift(N0, N1))
8773     return V;
8774 
8775   EVT VT = N0.getValueType();
8776   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8777 
8778   // fold vector ops
8779   if (VT.isVector())
8780     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8781       return FoldedVOp;
8782 
8783   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8784 
8785   // fold (srl c1, c2) -> c1 >>u c2
8786   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8787     return C;
8788 
8789   if (SDValue NewSel = foldBinOpIntoSelect(N))
8790     return NewSel;
8791 
8792   // if (srl x, c) is known to be zero, return 0
8793   if (N1C &&
8794       DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8795     return DAG.getConstant(0, SDLoc(N), VT);
8796 
8797   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8798   if (N0.getOpcode() == ISD::SRL) {
8799     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8800                                           ConstantSDNode *RHS) {
8801       APInt c1 = LHS->getAPIntValue();
8802       APInt c2 = RHS->getAPIntValue();
8803       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8804       return (c1 + c2).uge(OpSizeInBits);
8805     };
8806     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8807       return DAG.getConstant(0, SDLoc(N), VT);
8808 
8809     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8810                                        ConstantSDNode *RHS) {
8811       APInt c1 = LHS->getAPIntValue();
8812       APInt c2 = RHS->getAPIntValue();
8813       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8814       return (c1 + c2).ult(OpSizeInBits);
8815     };
8816     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8817       SDLoc DL(N);
8818       EVT ShiftVT = N1.getValueType();
8819       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8820       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8821     }
8822   }
8823 
8824   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8825       N0.getOperand(0).getOpcode() == ISD::SRL) {
8826     SDValue InnerShift = N0.getOperand(0);
8827     // TODO - support non-uniform vector shift amounts.
8828     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8829       uint64_t c1 = N001C->getZExtValue();
8830       uint64_t c2 = N1C->getZExtValue();
8831       EVT InnerShiftVT = InnerShift.getValueType();
8832       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8833       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8834       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8835       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8836       if (c1 + OpSizeInBits == InnerShiftSize) {
8837         SDLoc DL(N);
8838         if (c1 + c2 >= InnerShiftSize)
8839           return DAG.getConstant(0, DL, VT);
8840         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8841         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8842                                        InnerShift.getOperand(0), NewShiftAmt);
8843         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8844       }
8845       // In the more general case, we can clear the high bits after the shift:
8846       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8847       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8848           c1 + c2 < InnerShiftSize) {
8849         SDLoc DL(N);
8850         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8851         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8852                                        InnerShift.getOperand(0), NewShiftAmt);
8853         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8854                                                             OpSizeInBits - c2),
8855                                        DL, InnerShiftVT);
8856         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8857         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8858       }
8859     }
8860   }
8861 
8862   // fold (srl (shl x, c), c) -> (and x, cst2)
8863   // TODO - (srl (shl x, c1), c2).
8864   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8865       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8866     SDLoc DL(N);
8867     SDValue Mask =
8868         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8869     AddToWorklist(Mask.getNode());
8870     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8871   }
8872 
8873   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8874   // TODO - support non-uniform vector shift amounts.
8875   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8876     // Shifting in all undef bits?
8877     EVT SmallVT = N0.getOperand(0).getValueType();
8878     unsigned BitSize = SmallVT.getScalarSizeInBits();
8879     if (N1C->getAPIntValue().uge(BitSize))
8880       return DAG.getUNDEF(VT);
8881 
8882     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8883       uint64_t ShiftAmt = N1C->getZExtValue();
8884       SDLoc DL0(N0);
8885       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8886                                        N0.getOperand(0),
8887                           DAG.getConstant(ShiftAmt, DL0,
8888                                           getShiftAmountTy(SmallVT)));
8889       AddToWorklist(SmallShift.getNode());
8890       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8891       SDLoc DL(N);
8892       return DAG.getNode(ISD::AND, DL, VT,
8893                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8894                          DAG.getConstant(Mask, DL, VT));
8895     }
8896   }
8897 
8898   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8899   // bit, which is unmodified by sra.
8900   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8901     if (N0.getOpcode() == ISD::SRA)
8902       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8903   }
8904 
8905   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8906   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8907       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8908     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8909 
8910     // If any of the input bits are KnownOne, then the input couldn't be all
8911     // zeros, thus the result of the srl will always be zero.
8912     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8913 
8914     // If all of the bits input the to ctlz node are known to be zero, then
8915     // the result of the ctlz is "32" and the result of the shift is one.
8916     APInt UnknownBits = ~Known.Zero;
8917     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8918 
8919     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8920     if (UnknownBits.isPowerOf2()) {
8921       // Okay, we know that only that the single bit specified by UnknownBits
8922       // could be set on input to the CTLZ node. If this bit is set, the SRL
8923       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8924       // to an SRL/XOR pair, which is likely to simplify more.
8925       unsigned ShAmt = UnknownBits.countTrailingZeros();
8926       SDValue Op = N0.getOperand(0);
8927 
8928       if (ShAmt) {
8929         SDLoc DL(N0);
8930         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8931                   DAG.getConstant(ShAmt, DL,
8932                                   getShiftAmountTy(Op.getValueType())));
8933         AddToWorklist(Op.getNode());
8934       }
8935 
8936       SDLoc DL(N);
8937       return DAG.getNode(ISD::XOR, DL, VT,
8938                          Op, DAG.getConstant(1, DL, VT));
8939     }
8940   }
8941 
8942   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8943   if (N1.getOpcode() == ISD::TRUNCATE &&
8944       N1.getOperand(0).getOpcode() == ISD::AND) {
8945     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8946       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8947   }
8948 
8949   // fold operands of srl based on knowledge that the low bits are not
8950   // demanded.
8951   if (SimplifyDemandedBits(SDValue(N, 0)))
8952     return SDValue(N, 0);
8953 
8954   if (N1C && !N1C->isOpaque())
8955     if (SDValue NewSRL = visitShiftByConstant(N))
8956       return NewSRL;
8957 
8958   // Attempt to convert a srl of a load into a narrower zero-extending load.
8959   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8960     return NarrowLoad;
8961 
8962   // Here is a common situation. We want to optimize:
8963   //
8964   //   %a = ...
8965   //   %b = and i32 %a, 2
8966   //   %c = srl i32 %b, 1
8967   //   brcond i32 %c ...
8968   //
8969   // into
8970   //
8971   //   %a = ...
8972   //   %b = and %a, 2
8973   //   %c = setcc eq %b, 0
8974   //   brcond %c ...
8975   //
8976   // However when after the source operand of SRL is optimized into AND, the SRL
8977   // itself may not be optimized further. Look for it and add the BRCOND into
8978   // the worklist.
8979   if (N->hasOneUse()) {
8980     SDNode *Use = *N->use_begin();
8981     if (Use->getOpcode() == ISD::BRCOND)
8982       AddToWorklist(Use);
8983     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8984       // Also look pass the truncate.
8985       Use = *Use->use_begin();
8986       if (Use->getOpcode() == ISD::BRCOND)
8987         AddToWorklist(Use);
8988     }
8989   }
8990 
8991   // Try to transform this shift into a multiply-high if
8992   // it matches the appropriate pattern detected in combineShiftToMULH.
8993   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8994     return MULH;
8995 
8996   return SDValue();
8997 }
8998 
8999 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
9000   EVT VT = N->getValueType(0);
9001   SDValue N0 = N->getOperand(0);
9002   SDValue N1 = N->getOperand(1);
9003   SDValue N2 = N->getOperand(2);
9004   bool IsFSHL = N->getOpcode() == ISD::FSHL;
9005   unsigned BitWidth = VT.getScalarSizeInBits();
9006 
9007   // fold (fshl N0, N1, 0) -> N0
9008   // fold (fshr N0, N1, 0) -> N1
9009   if (isPowerOf2_32(BitWidth))
9010     if (DAG.MaskedValueIsZero(
9011             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9012       return IsFSHL ? N0 : N1;
9013 
9014   auto IsUndefOrZero = [](SDValue V) {
9015     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9016   };
9017 
9018   // TODO - support non-uniform vector shift amounts.
9019   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9020     EVT ShAmtTy = N2.getValueType();
9021 
9022     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9023     if (Cst->getAPIntValue().uge(BitWidth)) {
9024       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9025       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9026                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9027     }
9028 
9029     unsigned ShAmt = Cst->getZExtValue();
9030     if (ShAmt == 0)
9031       return IsFSHL ? N0 : N1;
9032 
9033     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9034     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9035     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9036     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9037     if (IsUndefOrZero(N0))
9038       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9039                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9040                                          SDLoc(N), ShAmtTy));
9041     if (IsUndefOrZero(N1))
9042       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9043                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9044                                          SDLoc(N), ShAmtTy));
9045 
9046     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9047     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9048     // TODO - bigendian support once we have test coverage.
9049     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9050     // TODO - permit LHS EXTLOAD if extensions are shifted out.
9051     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9052         !DAG.getDataLayout().isBigEndian()) {
9053       auto *LHS = dyn_cast<LoadSDNode>(N0);
9054       auto *RHS = dyn_cast<LoadSDNode>(N1);
9055       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9056           LHS->getAddressSpace() == RHS->getAddressSpace() &&
9057           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9058           ISD::isNON_EXTLoad(LHS)) {
9059         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9060           SDLoc DL(RHS);
9061           uint64_t PtrOff =
9062               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9063           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9064           bool Fast = false;
9065           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9066                                      RHS->getAddressSpace(), NewAlign,
9067                                      RHS->getMemOperand()->getFlags(), &Fast) &&
9068               Fast) {
9069             SDValue NewPtr = DAG.getMemBasePlusOffset(
9070                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9071             AddToWorklist(NewPtr.getNode());
9072             SDValue Load = DAG.getLoad(
9073                 VT, DL, RHS->getChain(), NewPtr,
9074                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9075                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9076             // Replace the old load's chain with the new load's chain.
9077             WorklistRemover DeadNodes(*this);
9078             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9079             return Load;
9080           }
9081         }
9082       }
9083     }
9084   }
9085 
9086   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9087   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9088   // iff We know the shift amount is in range.
9089   // TODO: when is it worth doing SUB(BW, N2) as well?
9090   if (isPowerOf2_32(BitWidth)) {
9091     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9092     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9093       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9094     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9095       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9096   }
9097 
9098   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9099   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9100   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9101   // is legal as well we might be better off avoiding non-constant (BW - N2).
9102   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9103   if (N0 == N1 && hasOperation(RotOpc, VT))
9104     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9105 
9106   // Simplify, based on bits shifted out of N0/N1.
9107   if (SimplifyDemandedBits(SDValue(N, 0)))
9108     return SDValue(N, 0);
9109 
9110   return SDValue();
9111 }
9112 
9113 // Given a ABS node, detect the following pattern:
9114 // (ABS (SUB (EXTEND a), (EXTEND b))).
9115 // Generates UABD/SABD instruction.
9116 static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9117                                const TargetLowering &TLI) {
9118   SDValue AbsOp1 = N->getOperand(0);
9119   SDValue Op0, Op1;
9120 
9121   if (AbsOp1.getOpcode() != ISD::SUB)
9122     return SDValue();
9123 
9124   Op0 = AbsOp1.getOperand(0);
9125   Op1 = AbsOp1.getOperand(1);
9126 
9127   unsigned Opc0 = Op0.getOpcode();
9128   // Check if the operands of the sub are (zero|sign)-extended.
9129   if (Opc0 != Op1.getOpcode() ||
9130       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9131     return SDValue();
9132 
9133   EVT VT1 = Op0.getOperand(0).getValueType();
9134   EVT VT2 = Op1.getOperand(0).getValueType();
9135   // Check if the operands are of same type and valid size.
9136   unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9137   if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9138     return SDValue();
9139 
9140   Op0 = Op0.getOperand(0);
9141   Op1 = Op1.getOperand(0);
9142   SDValue ABD =
9143       DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9144   return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9145 }
9146 
9147 SDValue DAGCombiner::visitABS(SDNode *N) {
9148   SDValue N0 = N->getOperand(0);
9149   EVT VT = N->getValueType(0);
9150 
9151   // fold (abs c1) -> c2
9152   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9153     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9154   // fold (abs (abs x)) -> (abs x)
9155   if (N0.getOpcode() == ISD::ABS)
9156     return N0;
9157   // fold (abs x) -> x iff not-negative
9158   if (DAG.SignBitIsZero(N0))
9159     return N0;
9160 
9161   if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9162     return ABD;
9163 
9164   return SDValue();
9165 }
9166 
9167 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9168   SDValue N0 = N->getOperand(0);
9169   EVT VT = N->getValueType(0);
9170 
9171   // fold (bswap c1) -> c2
9172   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9173     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9174   // fold (bswap (bswap x)) -> x
9175   if (N0.getOpcode() == ISD::BSWAP)
9176     return N0->getOperand(0);
9177   return SDValue();
9178 }
9179 
9180 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9181   SDValue N0 = N->getOperand(0);
9182   EVT VT = N->getValueType(0);
9183 
9184   // fold (bitreverse c1) -> c2
9185   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9186     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9187   // fold (bitreverse (bitreverse x)) -> x
9188   if (N0.getOpcode() == ISD::BITREVERSE)
9189     return N0.getOperand(0);
9190   return SDValue();
9191 }
9192 
9193 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9194   SDValue N0 = N->getOperand(0);
9195   EVT VT = N->getValueType(0);
9196 
9197   // fold (ctlz c1) -> c2
9198   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9199     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9200 
9201   // If the value is known never to be zero, switch to the undef version.
9202   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9203     if (DAG.isKnownNeverZero(N0))
9204       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9205   }
9206 
9207   return SDValue();
9208 }
9209 
9210 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9211   SDValue N0 = N->getOperand(0);
9212   EVT VT = N->getValueType(0);
9213 
9214   // fold (ctlz_zero_undef c1) -> c2
9215   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9216     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9217   return SDValue();
9218 }
9219 
9220 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9221   SDValue N0 = N->getOperand(0);
9222   EVT VT = N->getValueType(0);
9223 
9224   // fold (cttz c1) -> c2
9225   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9226     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9227 
9228   // If the value is known never to be zero, switch to the undef version.
9229   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9230     if (DAG.isKnownNeverZero(N0))
9231       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9232   }
9233 
9234   return SDValue();
9235 }
9236 
9237 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9238   SDValue N0 = N->getOperand(0);
9239   EVT VT = N->getValueType(0);
9240 
9241   // fold (cttz_zero_undef c1) -> c2
9242   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9243     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9244   return SDValue();
9245 }
9246 
9247 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9248   SDValue N0 = N->getOperand(0);
9249   EVT VT = N->getValueType(0);
9250 
9251   // fold (ctpop c1) -> c2
9252   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9253     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9254   return SDValue();
9255 }
9256 
9257 // FIXME: This should be checking for no signed zeros on individual operands, as
9258 // well as no nans.
9259 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9260                                          SDValue RHS,
9261                                          const TargetLowering &TLI) {
9262   const TargetOptions &Options = DAG.getTarget().Options;
9263   EVT VT = LHS.getValueType();
9264 
9265   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9266          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9267          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9268 }
9269 
9270 /// Generate Min/Max node
9271 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9272                                    SDValue RHS, SDValue True, SDValue False,
9273                                    ISD::CondCode CC, const TargetLowering &TLI,
9274                                    SelectionDAG &DAG) {
9275   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9276     return SDValue();
9277 
9278   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9279   switch (CC) {
9280   case ISD::SETOLT:
9281   case ISD::SETOLE:
9282   case ISD::SETLT:
9283   case ISD::SETLE:
9284   case ISD::SETULT:
9285   case ISD::SETULE: {
9286     // Since it's known never nan to get here already, either fminnum or
9287     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9288     // expanded in terms of it.
9289     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9290     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9291       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9292 
9293     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9294     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9295       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9296     return SDValue();
9297   }
9298   case ISD::SETOGT:
9299   case ISD::SETOGE:
9300   case ISD::SETGT:
9301   case ISD::SETGE:
9302   case ISD::SETUGT:
9303   case ISD::SETUGE: {
9304     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9305     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9306       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9307 
9308     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9309     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9310       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9311     return SDValue();
9312   }
9313   default:
9314     return SDValue();
9315   }
9316 }
9317 
9318 /// If a (v)select has a condition value that is a sign-bit test, try to smear
9319 /// the condition operand sign-bit across the value width and use it as a mask.
9320 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9321   SDValue Cond = N->getOperand(0);
9322   SDValue C1 = N->getOperand(1);
9323   SDValue C2 = N->getOperand(2);
9324   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9325     return SDValue();
9326 
9327   EVT VT = N->getValueType(0);
9328   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9329       VT != Cond.getOperand(0).getValueType())
9330     return SDValue();
9331 
9332   // The inverted-condition + commuted-select variants of these patterns are
9333   // canonicalized to these forms in IR.
9334   SDValue X = Cond.getOperand(0);
9335   SDValue CondC = Cond.getOperand(1);
9336   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9337   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9338       isAllOnesOrAllOnesSplat(C2)) {
9339     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9340     SDLoc DL(N);
9341     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9342     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9343     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9344   }
9345   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9346     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9347     SDLoc DL(N);
9348     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9349     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9350     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9351   }
9352   return SDValue();
9353 }
9354 
9355 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9356   SDValue Cond = N->getOperand(0);
9357   SDValue N1 = N->getOperand(1);
9358   SDValue N2 = N->getOperand(2);
9359   EVT VT = N->getValueType(0);
9360   EVT CondVT = Cond.getValueType();
9361   SDLoc DL(N);
9362 
9363   if (!VT.isInteger())
9364     return SDValue();
9365 
9366   auto *C1 = dyn_cast<ConstantSDNode>(N1);
9367   auto *C2 = dyn_cast<ConstantSDNode>(N2);
9368   if (!C1 || !C2)
9369     return SDValue();
9370 
9371   // Only do this before legalization to avoid conflicting with target-specific
9372   // transforms in the other direction (create a select from a zext/sext). There
9373   // is also a target-independent combine here in DAGCombiner in the other
9374   // direction for (select Cond, -1, 0) when the condition is not i1.
9375   if (CondVT == MVT::i1 && !LegalOperations) {
9376     if (C1->isZero() && C2->isOne()) {
9377       // select Cond, 0, 1 --> zext (!Cond)
9378       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9379       if (VT != MVT::i1)
9380         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9381       return NotCond;
9382     }
9383     if (C1->isZero() && C2->isAllOnes()) {
9384       // select Cond, 0, -1 --> sext (!Cond)
9385       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9386       if (VT != MVT::i1)
9387         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9388       return NotCond;
9389     }
9390     if (C1->isOne() && C2->isZero()) {
9391       // select Cond, 1, 0 --> zext (Cond)
9392       if (VT != MVT::i1)
9393         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9394       return Cond;
9395     }
9396     if (C1->isAllOnes() && C2->isZero()) {
9397       // select Cond, -1, 0 --> sext (Cond)
9398       if (VT != MVT::i1)
9399         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9400       return Cond;
9401     }
9402 
9403     // Use a target hook because some targets may prefer to transform in the
9404     // other direction.
9405     if (TLI.convertSelectOfConstantsToMath(VT)) {
9406       // For any constants that differ by 1, we can transform the select into an
9407       // extend and add.
9408       const APInt &C1Val = C1->getAPIntValue();
9409       const APInt &C2Val = C2->getAPIntValue();
9410       if (C1Val - 1 == C2Val) {
9411         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9412         if (VT != MVT::i1)
9413           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9414         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9415       }
9416       if (C1Val + 1 == C2Val) {
9417         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9418         if (VT != MVT::i1)
9419           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9420         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9421       }
9422 
9423       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9424       if (C1Val.isPowerOf2() && C2Val.isZero()) {
9425         if (VT != MVT::i1)
9426           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9427         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9428         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9429       }
9430 
9431       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9432         return V;
9433     }
9434 
9435     return SDValue();
9436   }
9437 
9438   // fold (select Cond, 0, 1) -> (xor Cond, 1)
9439   // We can't do this reliably if integer based booleans have different contents
9440   // to floating point based booleans. This is because we can't tell whether we
9441   // have an integer-based boolean or a floating-point-based boolean unless we
9442   // can find the SETCC that produced it and inspect its operands. This is
9443   // fairly easy if C is the SETCC node, but it can potentially be
9444   // undiscoverable (or not reasonably discoverable). For example, it could be
9445   // in another basic block or it could require searching a complicated
9446   // expression.
9447   if (CondVT.isInteger() &&
9448       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9449           TargetLowering::ZeroOrOneBooleanContent &&
9450       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9451           TargetLowering::ZeroOrOneBooleanContent &&
9452       C1->isZero() && C2->isOne()) {
9453     SDValue NotCond =
9454         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9455     if (VT.bitsEq(CondVT))
9456       return NotCond;
9457     return DAG.getZExtOrTrunc(NotCond, DL, VT);
9458   }
9459 
9460   return SDValue();
9461 }
9462 
9463 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9464   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
9465          "Expected a (v)select");
9466   SDValue Cond = N->getOperand(0);
9467   SDValue T = N->getOperand(1), F = N->getOperand(2);
9468   EVT VT = N->getValueType(0);
9469   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9470     return SDValue();
9471 
9472   // select Cond, Cond, F --> or Cond, F
9473   // select Cond, 1, F    --> or Cond, F
9474   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9475     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9476 
9477   // select Cond, T, Cond --> and Cond, T
9478   // select Cond, T, 0    --> and Cond, T
9479   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9480     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9481 
9482   // select Cond, T, 1 --> or (not Cond), T
9483   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9484     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9485     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9486   }
9487 
9488   // select Cond, 0, F --> and (not Cond), F
9489   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9490     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9491     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9492   }
9493 
9494   return SDValue();
9495 }
9496 
9497 SDValue DAGCombiner::visitSELECT(SDNode *N) {
9498   SDValue N0 = N->getOperand(0);
9499   SDValue N1 = N->getOperand(1);
9500   SDValue N2 = N->getOperand(2);
9501   EVT VT = N->getValueType(0);
9502   EVT VT0 = N0.getValueType();
9503   SDLoc DL(N);
9504   SDNodeFlags Flags = N->getFlags();
9505 
9506   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9507     return V;
9508 
9509   if (SDValue V = foldSelectOfConstants(N))
9510     return V;
9511 
9512   if (SDValue V = foldBoolSelectToLogic(N, DAG))
9513     return V;
9514 
9515   // If we can fold this based on the true/false value, do so.
9516   if (SimplifySelectOps(N, N1, N2))
9517     return SDValue(N, 0); // Don't revisit N.
9518 
9519   if (VT0 == MVT::i1) {
9520     // The code in this block deals with the following 2 equivalences:
9521     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9522     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9523     // The target can specify its preferred form with the
9524     // shouldNormalizeToSelectSequence() callback. However we always transform
9525     // to the right anyway if we find the inner select exists in the DAG anyway
9526     // and we always transform to the left side if we know that we can further
9527     // optimize the combination of the conditions.
9528     bool normalizeToSequence =
9529         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9530     // select (and Cond0, Cond1), X, Y
9531     //   -> select Cond0, (select Cond1, X, Y), Y
9532     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9533       SDValue Cond0 = N0->getOperand(0);
9534       SDValue Cond1 = N0->getOperand(1);
9535       SDValue InnerSelect =
9536           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9537       if (normalizeToSequence || !InnerSelect.use_empty())
9538         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9539                            InnerSelect, N2, Flags);
9540       // Cleanup on failure.
9541       if (InnerSelect.use_empty())
9542         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9543     }
9544     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9545     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9546       SDValue Cond0 = N0->getOperand(0);
9547       SDValue Cond1 = N0->getOperand(1);
9548       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9549                                         Cond1, N1, N2, Flags);
9550       if (normalizeToSequence || !InnerSelect.use_empty())
9551         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9552                            InnerSelect, Flags);
9553       // Cleanup on failure.
9554       if (InnerSelect.use_empty())
9555         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9556     }
9557 
9558     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9559     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9560       SDValue N1_0 = N1->getOperand(0);
9561       SDValue N1_1 = N1->getOperand(1);
9562       SDValue N1_2 = N1->getOperand(2);
9563       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9564         // Create the actual and node if we can generate good code for it.
9565         if (!normalizeToSequence) {
9566           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9567           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9568                              N2, Flags);
9569         }
9570         // Otherwise see if we can optimize the "and" to a better pattern.
9571         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9572           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9573                              N2, Flags);
9574         }
9575       }
9576     }
9577     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9578     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9579       SDValue N2_0 = N2->getOperand(0);
9580       SDValue N2_1 = N2->getOperand(1);
9581       SDValue N2_2 = N2->getOperand(2);
9582       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9583         // Create the actual or node if we can generate good code for it.
9584         if (!normalizeToSequence) {
9585           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9586           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9587                              N2_2, Flags);
9588         }
9589         // Otherwise see if we can optimize to a better pattern.
9590         if (SDValue Combined = visitORLike(N0, N2_0, N))
9591           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9592                              N2_2, Flags);
9593       }
9594     }
9595   }
9596 
9597   // select (not Cond), N1, N2 -> select Cond, N2, N1
9598   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9599     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9600     SelectOp->setFlags(Flags);
9601     return SelectOp;
9602   }
9603 
9604   // Fold selects based on a setcc into other things, such as min/max/abs.
9605   if (N0.getOpcode() == ISD::SETCC) {
9606     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9607     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9608 
9609     // select (fcmp lt x, y), x, y -> fminnum x, y
9610     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9611     //
9612     // This is OK if we don't care what happens if either operand is a NaN.
9613     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9614       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9615                                                 CC, TLI, DAG))
9616         return FMinMax;
9617 
9618     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9619     // This is conservatively limited to pre-legal-operations to give targets
9620     // a chance to reverse the transform if they want to do that. Also, it is
9621     // unlikely that the pattern would be formed late, so it's probably not
9622     // worth going through the other checks.
9623     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9624         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9625         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9626       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9627       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9628       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9629         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9630         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9631         //
9632         // The IR equivalent of this transform would have this form:
9633         //   %a = add %x, C
9634         //   %c = icmp ugt %x, ~C
9635         //   %r = select %c, -1, %a
9636         //   =>
9637         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9638         //   %u0 = extractvalue %u, 0
9639         //   %u1 = extractvalue %u, 1
9640         //   %r = select %u1, -1, %u0
9641         SDVTList VTs = DAG.getVTList(VT, VT0);
9642         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9643         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9644       }
9645     }
9646 
9647     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9648         (!LegalOperations &&
9649          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9650       // Any flags available in a select/setcc fold will be on the setcc as they
9651       // migrated from fcmp
9652       Flags = N0.getNode()->getFlags();
9653       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9654                                        N2, N0.getOperand(2));
9655       SelectNode->setFlags(Flags);
9656       return SelectNode;
9657     }
9658 
9659     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
9660       return NewSel;
9661   }
9662 
9663   if (!VT.isVector())
9664     if (SDValue BinOp = foldSelectOfBinops(N))
9665       return BinOp;
9666 
9667   return SDValue();
9668 }
9669 
9670 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9671 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9672 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9673   SDLoc DL(N);
9674   SDValue Cond = N->getOperand(0);
9675   SDValue LHS = N->getOperand(1);
9676   SDValue RHS = N->getOperand(2);
9677   EVT VT = N->getValueType(0);
9678   int NumElems = VT.getVectorNumElements();
9679   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9680          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9681          Cond.getOpcode() == ISD::BUILD_VECTOR);
9682 
9683   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9684   // binary ones here.
9685   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9686     return SDValue();
9687 
9688   // We're sure we have an even number of elements due to the
9689   // concat_vectors we have as arguments to vselect.
9690   // Skip BV elements until we find one that's not an UNDEF
9691   // After we find an UNDEF element, keep looping until we get to half the
9692   // length of the BV and see if all the non-undef nodes are the same.
9693   ConstantSDNode *BottomHalf = nullptr;
9694   for (int i = 0; i < NumElems / 2; ++i) {
9695     if (Cond->getOperand(i)->isUndef())
9696       continue;
9697 
9698     if (BottomHalf == nullptr)
9699       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9700     else if (Cond->getOperand(i).getNode() != BottomHalf)
9701       return SDValue();
9702   }
9703 
9704   // Do the same for the second half of the BuildVector
9705   ConstantSDNode *TopHalf = nullptr;
9706   for (int i = NumElems / 2; i < NumElems; ++i) {
9707     if (Cond->getOperand(i)->isUndef())
9708       continue;
9709 
9710     if (TopHalf == nullptr)
9711       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9712     else if (Cond->getOperand(i).getNode() != TopHalf)
9713       return SDValue();
9714   }
9715 
9716   assert(TopHalf && BottomHalf &&
9717          "One half of the selector was all UNDEFs and the other was all the "
9718          "same value. This should have been addressed before this function.");
9719   return DAG.getNode(
9720       ISD::CONCAT_VECTORS, DL, VT,
9721       BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
9722       TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
9723 }
9724 
9725 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9726   if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9727     return false;
9728 
9729   // For now we check only the LHS of the add.
9730   SDValue LHS = Index.getOperand(0);
9731   SDValue SplatVal = DAG.getSplatValue(LHS);
9732   if (!SplatVal)
9733     return false;
9734 
9735   BasePtr = SplatVal;
9736   Index = Index.getOperand(1);
9737   return true;
9738 }
9739 
9740 // Fold sext/zext of index into index type.
9741 bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
9742                      bool Scaled, SelectionDAG &DAG) {
9743   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9744 
9745   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
9746     SDValue Op = Index.getOperand(0);
9747     MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
9748     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9749       Index = Op;
9750       return true;
9751     }
9752   }
9753 
9754   if (Index.getOpcode() == ISD::SIGN_EXTEND) {
9755     SDValue Op = Index.getOperand(0);
9756     MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
9757     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9758       Index = Op;
9759       return true;
9760     }
9761   }
9762 
9763   return false;
9764 }
9765 
9766 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9767   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9768   SDValue Mask = MSC->getMask();
9769   SDValue Chain = MSC->getChain();
9770   SDValue Index = MSC->getIndex();
9771   SDValue Scale = MSC->getScale();
9772   SDValue StoreVal = MSC->getValue();
9773   SDValue BasePtr = MSC->getBasePtr();
9774   SDLoc DL(N);
9775 
9776   // Zap scatters with a zero mask.
9777   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9778     return Chain;
9779 
9780   if (refineUniformBase(BasePtr, Index, DAG)) {
9781     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9782     return DAG.getMaskedScatter(
9783         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9784         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9785   }
9786 
9787   if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
9788     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9789     return DAG.getMaskedScatter(
9790         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9791         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9792   }
9793 
9794   return SDValue();
9795 }
9796 
9797 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9798   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9799   SDValue Mask = MST->getMask();
9800   SDValue Chain = MST->getChain();
9801   SDLoc DL(N);
9802 
9803   // Zap masked stores with a zero mask.
9804   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9805     return Chain;
9806 
9807   // If this is a masked load with an all ones mask, we can use a unmasked load.
9808   // FIXME: Can we do this for indexed, compressing, or truncating stores?
9809   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9810       MST->isUnindexed() && !MST->isCompressingStore() &&
9811       !MST->isTruncatingStore())
9812     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9813                         MST->getBasePtr(), MST->getMemOperand());
9814 
9815   // Try transforming N to an indexed store.
9816   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9817     return SDValue(N, 0);
9818 
9819   return SDValue();
9820 }
9821 
9822 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9823   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9824   SDValue Mask = MGT->getMask();
9825   SDValue Chain = MGT->getChain();
9826   SDValue Index = MGT->getIndex();
9827   SDValue Scale = MGT->getScale();
9828   SDValue PassThru = MGT->getPassThru();
9829   SDValue BasePtr = MGT->getBasePtr();
9830   SDLoc DL(N);
9831 
9832   // Zap gathers with a zero mask.
9833   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9834     return CombineTo(N, PassThru, MGT->getChain());
9835 
9836   if (refineUniformBase(BasePtr, Index, DAG)) {
9837     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9838     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9839                                MGT->getMemoryVT(), DL, Ops,
9840                                MGT->getMemOperand(), MGT->getIndexType(),
9841                                MGT->getExtensionType());
9842   }
9843 
9844   if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
9845     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9846     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9847                                MGT->getMemoryVT(), DL, Ops,
9848                                MGT->getMemOperand(), MGT->getIndexType(),
9849                                MGT->getExtensionType());
9850   }
9851 
9852   return SDValue();
9853 }
9854 
9855 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9856   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9857   SDValue Mask = MLD->getMask();
9858   SDLoc DL(N);
9859 
9860   // Zap masked loads with a zero mask.
9861   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9862     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9863 
9864   // If this is a masked load with an all ones mask, we can use a unmasked load.
9865   // FIXME: Can we do this for indexed, expanding, or extending loads?
9866   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9867       MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9868       MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9869     SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9870                                 MLD->getBasePtr(), MLD->getMemOperand());
9871     return CombineTo(N, NewLd, NewLd.getValue(1));
9872   }
9873 
9874   // Try transforming N to an indexed load.
9875   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9876     return SDValue(N, 0);
9877 
9878   return SDValue();
9879 }
9880 
9881 /// A vector select of 2 constant vectors can be simplified to math/logic to
9882 /// avoid a variable select instruction and possibly avoid constant loads.
9883 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9884   SDValue Cond = N->getOperand(0);
9885   SDValue N1 = N->getOperand(1);
9886   SDValue N2 = N->getOperand(2);
9887   EVT VT = N->getValueType(0);
9888   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9889       !TLI.convertSelectOfConstantsToMath(VT) ||
9890       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9891       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9892     return SDValue();
9893 
9894   // Check if we can use the condition value to increment/decrement a single
9895   // constant value. This simplifies a select to an add and removes a constant
9896   // load/materialization from the general case.
9897   bool AllAddOne = true;
9898   bool AllSubOne = true;
9899   unsigned Elts = VT.getVectorNumElements();
9900   for (unsigned i = 0; i != Elts; ++i) {
9901     SDValue N1Elt = N1.getOperand(i);
9902     SDValue N2Elt = N2.getOperand(i);
9903     if (N1Elt.isUndef() || N2Elt.isUndef())
9904       continue;
9905     if (N1Elt.getValueType() != N2Elt.getValueType())
9906       continue;
9907 
9908     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9909     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9910     if (C1 != C2 + 1)
9911       AllAddOne = false;
9912     if (C1 != C2 - 1)
9913       AllSubOne = false;
9914   }
9915 
9916   // Further simplifications for the extra-special cases where the constants are
9917   // all 0 or all -1 should be implemented as folds of these patterns.
9918   SDLoc DL(N);
9919   if (AllAddOne || AllSubOne) {
9920     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9921     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9922     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9923     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9924     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9925   }
9926 
9927   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9928   APInt Pow2C;
9929   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9930       isNullOrNullSplat(N2)) {
9931     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9932     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9933     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9934   }
9935 
9936   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9937     return V;
9938 
9939   // The general case for select-of-constants:
9940   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9941   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9942   // leave that to a machine-specific pass.
9943   return SDValue();
9944 }
9945 
9946 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9947   SDValue N0 = N->getOperand(0);
9948   SDValue N1 = N->getOperand(1);
9949   SDValue N2 = N->getOperand(2);
9950   EVT VT = N->getValueType(0);
9951   SDLoc DL(N);
9952 
9953   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9954     return V;
9955 
9956   if (SDValue V = foldBoolSelectToLogic(N, DAG))
9957     return V;
9958 
9959   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9960   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9961     return DAG.getSelect(DL, VT, F, N2, N1);
9962 
9963   // Canonicalize integer abs.
9964   // vselect (setg[te] X,  0),  X, -X ->
9965   // vselect (setgt    X, -1),  X, -X ->
9966   // vselect (setl[te] X,  0), -X,  X ->
9967   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9968   if (N0.getOpcode() == ISD::SETCC) {
9969     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9970     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9971     bool isAbs = false;
9972     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9973 
9974     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9975          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9976         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9977       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9978     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9979              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9980       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9981 
9982     if (isAbs) {
9983       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9984         return DAG.getNode(ISD::ABS, DL, VT, LHS);
9985 
9986       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9987                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
9988                                                   DL, getShiftAmountTy(VT)));
9989       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9990       AddToWorklist(Shift.getNode());
9991       AddToWorklist(Add.getNode());
9992       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9993     }
9994 
9995     // vselect x, y (fcmp lt x, y) -> fminnum x, y
9996     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9997     //
9998     // This is OK if we don't care about what happens if either operand is a
9999     // NaN.
10000     //
10001     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10002       if (SDValue FMinMax =
10003               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10004         return FMinMax;
10005     }
10006 
10007     // If this select has a condition (setcc) with narrower operands than the
10008     // select, try to widen the compare to match the select width.
10009     // TODO: This should be extended to handle any constant.
10010     // TODO: This could be extended to handle non-loading patterns, but that
10011     //       requires thorough testing to avoid regressions.
10012     if (isNullOrNullSplat(RHS)) {
10013       EVT NarrowVT = LHS.getValueType();
10014       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10015       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10016       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10017       unsigned WideWidth = WideVT.getScalarSizeInBits();
10018       bool IsSigned = isSignedIntSetCC(CC);
10019       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10020       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10021           SetCCWidth != 1 && SetCCWidth < WideWidth &&
10022           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10023           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10024         // Both compare operands can be widened for free. The LHS can use an
10025         // extended load, and the RHS is a constant:
10026         //   vselect (ext (setcc load(X), C)), N1, N2 -->
10027         //   vselect (setcc extload(X), C'), N1, N2
10028         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10029         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10030         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10031         EVT WideSetCCVT = getSetCCResultType(WideVT);
10032         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10033         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10034       }
10035     }
10036 
10037     // Match VSELECTs into add with unsigned saturation.
10038     if (hasOperation(ISD::UADDSAT, VT)) {
10039       // Check if one of the arms of the VSELECT is vector with all bits set.
10040       // If it's on the left side invert the predicate to simplify logic below.
10041       SDValue Other;
10042       ISD::CondCode SatCC = CC;
10043       if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10044         Other = N2;
10045         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10046       } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10047         Other = N1;
10048       }
10049 
10050       if (Other && Other.getOpcode() == ISD::ADD) {
10051         SDValue CondLHS = LHS, CondRHS = RHS;
10052         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10053 
10054         // Canonicalize condition operands.
10055         if (SatCC == ISD::SETUGE) {
10056           std::swap(CondLHS, CondRHS);
10057           SatCC = ISD::SETULE;
10058         }
10059 
10060         // We can test against either of the addition operands.
10061         // x <= x+y ? x+y : ~0 --> uaddsat x, y
10062         // x+y >= x ? x+y : ~0 --> uaddsat x, y
10063         if (SatCC == ISD::SETULE && Other == CondRHS &&
10064             (OpLHS == CondLHS || OpRHS == CondLHS))
10065           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10066 
10067         if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10068             (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10069              OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10070             CondLHS == OpLHS) {
10071           // If the RHS is a constant we have to reverse the const
10072           // canonicalization.
10073           // x >= ~C ? x+C : ~0 --> uaddsat x, C
10074           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10075             return Cond->getAPIntValue() == ~Op->getAPIntValue();
10076           };
10077           if (SatCC == ISD::SETULE &&
10078               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10079             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10080         }
10081       }
10082     }
10083 
10084     // Match VSELECTs into sub with unsigned saturation.
10085     if (hasOperation(ISD::USUBSAT, VT)) {
10086       // Check if one of the arms of the VSELECT is a zero vector. If it's on
10087       // the left side invert the predicate to simplify logic below.
10088       SDValue Other;
10089       ISD::CondCode SatCC = CC;
10090       if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10091         Other = N2;
10092         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10093       } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10094         Other = N1;
10095       }
10096 
10097       if (Other && Other.getNumOperands() == 2) {
10098         SDValue CondRHS = RHS;
10099         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10100 
10101         if (Other.getOpcode() == ISD::SUB &&
10102             LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10103             OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10104           // Look for a general sub with unsigned saturation first.
10105           // zext(x) >= y ? x - trunc(y) : 0
10106           // --> usubsat(x,trunc(umin(y,SatLimit)))
10107           // zext(x) >  y ? x - trunc(y) : 0
10108           // --> usubsat(x,trunc(umin(y,SatLimit)))
10109           if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10110             return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10111                                        DL);
10112         }
10113 
10114         if (OpLHS == LHS) {
10115           // Look for a general sub with unsigned saturation first.
10116           // x >= y ? x-y : 0 --> usubsat x, y
10117           // x >  y ? x-y : 0 --> usubsat x, y
10118           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10119               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10120             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10121 
10122           if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10123               OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10124             if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10125                 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10126               // If the RHS is a constant we have to reverse the const
10127               // canonicalization.
10128               // x > C-1 ? x+-C : 0 --> usubsat x, C
10129               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10130                 return (!Op && !Cond) ||
10131                        (Op && Cond &&
10132                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10133               };
10134               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10135                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10136                                             /*AllowUndefs*/ true)) {
10137                 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10138                                     DAG.getConstant(0, DL, VT), OpRHS);
10139                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10140               }
10141 
10142               // Another special case: If C was a sign bit, the sub has been
10143               // canonicalized into a xor.
10144               // FIXME: Would it be better to use computeKnownBits to determine
10145               //        whether it's safe to decanonicalize the xor?
10146               // x s< 0 ? x^C : 0 --> usubsat x, C
10147               APInt SplatValue;
10148               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10149                   ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10150                   ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10151                   SplatValue.isSignMask()) {
10152                 // Note that we have to rebuild the RHS constant here to
10153                 // ensure we don't rely on particular values of undef lanes.
10154                 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10155                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10156               }
10157             }
10158           }
10159         }
10160       }
10161     }
10162   }
10163 
10164   if (SimplifySelectOps(N, N1, N2))
10165     return SDValue(N, 0);  // Don't revisit N.
10166 
10167   // Fold (vselect all_ones, N1, N2) -> N1
10168   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10169     return N1;
10170   // Fold (vselect all_zeros, N1, N2) -> N2
10171   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10172     return N2;
10173 
10174   // The ConvertSelectToConcatVector function is assuming both the above
10175   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10176   // and addressed.
10177   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10178       N2.getOpcode() == ISD::CONCAT_VECTORS &&
10179       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10180     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10181       return CV;
10182   }
10183 
10184   if (SDValue V = foldVSelectOfConstants(N))
10185     return V;
10186 
10187   return SDValue();
10188 }
10189 
10190 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10191   SDValue N0 = N->getOperand(0);
10192   SDValue N1 = N->getOperand(1);
10193   SDValue N2 = N->getOperand(2);
10194   SDValue N3 = N->getOperand(3);
10195   SDValue N4 = N->getOperand(4);
10196   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10197 
10198   // fold select_cc lhs, rhs, x, x, cc -> x
10199   if (N2 == N3)
10200     return N2;
10201 
10202   // Determine if the condition we're dealing with is constant
10203   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10204                                   CC, SDLoc(N), false)) {
10205     AddToWorklist(SCC.getNode());
10206 
10207     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10208       if (!SCCC->isZero())
10209         return N2;    // cond always true -> true val
10210       else
10211         return N3;    // cond always false -> false val
10212     } else if (SCC->isUndef()) {
10213       // When the condition is UNDEF, just return the first operand. This is
10214       // coherent the DAG creation, no setcc node is created in this case
10215       return N2;
10216     } else if (SCC.getOpcode() == ISD::SETCC) {
10217       // Fold to a simpler select_cc
10218       SDValue SelectOp = DAG.getNode(
10219           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10220           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10221       SelectOp->setFlags(SCC->getFlags());
10222       return SelectOp;
10223     }
10224   }
10225 
10226   // If we can fold this based on the true/false value, do so.
10227   if (SimplifySelectOps(N, N2, N3))
10228     return SDValue(N, 0);  // Don't revisit N.
10229 
10230   // fold select_cc into other things, such as min/max/abs
10231   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10232 }
10233 
10234 SDValue DAGCombiner::visitSETCC(SDNode *N) {
10235   // setcc is very commonly used as an argument to brcond. This pattern
10236   // also lend itself to numerous combines and, as a result, it is desired
10237   // we keep the argument to a brcond as a setcc as much as possible.
10238   bool PreferSetCC =
10239       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10240 
10241   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10242   EVT VT = N->getValueType(0);
10243 
10244   //   SETCC(FREEZE(X), CONST, Cond)
10245   // =>
10246   //   FREEZE(SETCC(X, CONST, Cond))
10247   // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10248   // isn't equivalent to true or false.
10249   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10250   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10251   //
10252   // This transformation is beneficial because visitBRCOND can fold
10253   // BRCOND(FREEZE(X)) to BRCOND(X).
10254 
10255   // Conservatively optimize integer comparisons only.
10256   if (PreferSetCC) {
10257     // Do this only when SETCC is going to be used by BRCOND.
10258 
10259     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10260     ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
10261     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10262     bool Updated = false;
10263 
10264     // Is 'X Cond C' always true or false?
10265     auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
10266       bool False = (Cond == ISD::SETULT && C->isZero()) ||
10267                    (Cond == ISD::SETLT  && C->isMinSignedValue()) ||
10268                    (Cond == ISD::SETUGT && C->isAllOnes()) ||
10269                    (Cond == ISD::SETGT  && C->isMaxSignedValue());
10270       bool True =  (Cond == ISD::SETULE && C->isAllOnes()) ||
10271                    (Cond == ISD::SETLE  && C->isMaxSignedValue()) ||
10272                    (Cond == ISD::SETUGE && C->isZero()) ||
10273                    (Cond == ISD::SETGE  && C->isMinSignedValue());
10274       return True || False;
10275     };
10276 
10277     if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10278       if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10279         N0 = N0->getOperand(0);
10280         Updated = true;
10281       }
10282     }
10283     if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10284       if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
10285                                N0C)) {
10286         N1 = N1->getOperand(0);
10287         Updated = true;
10288       }
10289     }
10290 
10291     if (Updated)
10292       return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10293   }
10294 
10295   SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10296                                    SDLoc(N), !PreferSetCC);
10297 
10298   if (!Combined)
10299     return SDValue();
10300 
10301   // If we prefer to have a setcc, and we don't, we'll try our best to
10302   // recreate one using rebuildSetCC.
10303   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10304     SDValue NewSetCC = rebuildSetCC(Combined);
10305 
10306     // We don't have anything interesting to combine to.
10307     if (NewSetCC.getNode() == N)
10308       return SDValue();
10309 
10310     if (NewSetCC)
10311       return NewSetCC;
10312   }
10313 
10314   return Combined;
10315 }
10316 
10317 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10318   SDValue LHS = N->getOperand(0);
10319   SDValue RHS = N->getOperand(1);
10320   SDValue Carry = N->getOperand(2);
10321   SDValue Cond = N->getOperand(3);
10322 
10323   // If Carry is false, fold to a regular SETCC.
10324   if (isNullConstant(Carry))
10325     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10326 
10327   return SDValue();
10328 }
10329 
10330 /// Check if N satisfies:
10331 ///   N is used once.
10332 ///   N is a Load.
10333 ///   The load is compatible with ExtOpcode. It means
10334 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
10335 ///     extension.
10336 ///     Otherwise returns true.
10337 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10338   if (!N.hasOneUse())
10339     return false;
10340 
10341   if (!isa<LoadSDNode>(N))
10342     return false;
10343 
10344   LoadSDNode *Load = cast<LoadSDNode>(N);
10345   ISD::LoadExtType LoadExt = Load->getExtensionType();
10346   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10347     return true;
10348 
10349   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10350   // extension.
10351   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10352       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10353     return false;
10354 
10355   return true;
10356 }
10357 
10358 /// Fold
10359 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10360 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10361 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10362 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10363 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10364 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10365                                          SelectionDAG &DAG) {
10366   unsigned Opcode = N->getOpcode();
10367   SDValue N0 = N->getOperand(0);
10368   EVT VT = N->getValueType(0);
10369   SDLoc DL(N);
10370 
10371   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10372           Opcode == ISD::ANY_EXTEND) &&
10373          "Expected EXTEND dag node in input!");
10374 
10375   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10376       !N0.hasOneUse())
10377     return SDValue();
10378 
10379   SDValue Op1 = N0->getOperand(1);
10380   SDValue Op2 = N0->getOperand(2);
10381   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10382     return SDValue();
10383 
10384   auto ExtLoadOpcode = ISD::EXTLOAD;
10385   if (Opcode == ISD::SIGN_EXTEND)
10386     ExtLoadOpcode = ISD::SEXTLOAD;
10387   else if (Opcode == ISD::ZERO_EXTEND)
10388     ExtLoadOpcode = ISD::ZEXTLOAD;
10389 
10390   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10391   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10392   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10393       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10394     return SDValue();
10395 
10396   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10397   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10398   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10399 }
10400 
10401 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10402 /// a build_vector of constants.
10403 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10404 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10405 /// Vector extends are not folded if operations are legal; this is to
10406 /// avoid introducing illegal build_vector dag nodes.
10407 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10408                                          SelectionDAG &DAG, bool LegalTypes) {
10409   unsigned Opcode = N->getOpcode();
10410   SDValue N0 = N->getOperand(0);
10411   EVT VT = N->getValueType(0);
10412   SDLoc DL(N);
10413 
10414   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10415          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
10416          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
10417          && "Expected EXTEND dag node in input!");
10418 
10419   // fold (sext c1) -> c1
10420   // fold (zext c1) -> c1
10421   // fold (aext c1) -> c1
10422   if (isa<ConstantSDNode>(N0))
10423     return DAG.getNode(Opcode, DL, VT, N0);
10424 
10425   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10426   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10427   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10428   if (N0->getOpcode() == ISD::SELECT) {
10429     SDValue Op1 = N0->getOperand(1);
10430     SDValue Op2 = N0->getOperand(2);
10431     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10432         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10433       // For any_extend, choose sign extension of the constants to allow a
10434       // possible further transform to sign_extend_inreg.i.e.
10435       //
10436       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10437       // t2: i64 = any_extend t1
10438       // -->
10439       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10440       // -->
10441       // t4: i64 = sign_extend_inreg t3
10442       unsigned FoldOpc = Opcode;
10443       if (FoldOpc == ISD::ANY_EXTEND)
10444         FoldOpc = ISD::SIGN_EXTEND;
10445       return DAG.getSelect(DL, VT, N0->getOperand(0),
10446                            DAG.getNode(FoldOpc, DL, VT, Op1),
10447                            DAG.getNode(FoldOpc, DL, VT, Op2));
10448     }
10449   }
10450 
10451   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10452   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10453   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10454   EVT SVT = VT.getScalarType();
10455   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10456       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10457     return SDValue();
10458 
10459   // We can fold this node into a build_vector.
10460   unsigned VTBits = SVT.getSizeInBits();
10461   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10462   SmallVector<SDValue, 8> Elts;
10463   unsigned NumElts = VT.getVectorNumElements();
10464 
10465   // For zero-extensions, UNDEF elements still guarantee to have the upper
10466   // bits set to zero.
10467   bool IsZext =
10468       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10469 
10470   for (unsigned i = 0; i != NumElts; ++i) {
10471     SDValue Op = N0.getOperand(i);
10472     if (Op.isUndef()) {
10473       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10474       continue;
10475     }
10476 
10477     SDLoc DL(Op);
10478     // Get the constant value and if needed trunc it to the size of the type.
10479     // Nodes like build_vector might have constants wider than the scalar type.
10480     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10481     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10482       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10483     else
10484       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10485   }
10486 
10487   return DAG.getBuildVector(VT, DL, Elts);
10488 }
10489 
10490 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10491 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10492 // transformation. Returns true if extension are possible and the above
10493 // mentioned transformation is profitable.
10494 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10495                                     unsigned ExtOpc,
10496                                     SmallVectorImpl<SDNode *> &ExtendNodes,
10497                                     const TargetLowering &TLI) {
10498   bool HasCopyToRegUses = false;
10499   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10500   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10501                             UE = N0.getNode()->use_end();
10502        UI != UE; ++UI) {
10503     SDNode *User = *UI;
10504     if (User == N)
10505       continue;
10506     if (UI.getUse().getResNo() != N0.getResNo())
10507       continue;
10508     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10509     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10510       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10511       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10512         // Sign bits will be lost after a zext.
10513         return false;
10514       bool Add = false;
10515       for (unsigned i = 0; i != 2; ++i) {
10516         SDValue UseOp = User->getOperand(i);
10517         if (UseOp == N0)
10518           continue;
10519         if (!isa<ConstantSDNode>(UseOp))
10520           return false;
10521         Add = true;
10522       }
10523       if (Add)
10524         ExtendNodes.push_back(User);
10525       continue;
10526     }
10527     // If truncates aren't free and there are users we can't
10528     // extend, it isn't worthwhile.
10529     if (!isTruncFree)
10530       return false;
10531     // Remember if this value is live-out.
10532     if (User->getOpcode() == ISD::CopyToReg)
10533       HasCopyToRegUses = true;
10534   }
10535 
10536   if (HasCopyToRegUses) {
10537     bool BothLiveOut = false;
10538     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10539          UI != UE; ++UI) {
10540       SDUse &Use = UI.getUse();
10541       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10542         BothLiveOut = true;
10543         break;
10544       }
10545     }
10546     if (BothLiveOut)
10547       // Both unextended and extended values are live out. There had better be
10548       // a good reason for the transformation.
10549       return ExtendNodes.size();
10550   }
10551   return true;
10552 }
10553 
10554 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10555                                   SDValue OrigLoad, SDValue ExtLoad,
10556                                   ISD::NodeType ExtType) {
10557   // Extend SetCC uses if necessary.
10558   SDLoc DL(ExtLoad);
10559   for (SDNode *SetCC : SetCCs) {
10560     SmallVector<SDValue, 4> Ops;
10561 
10562     for (unsigned j = 0; j != 2; ++j) {
10563       SDValue SOp = SetCC->getOperand(j);
10564       if (SOp == OrigLoad)
10565         Ops.push_back(ExtLoad);
10566       else
10567         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10568     }
10569 
10570     Ops.push_back(SetCC->getOperand(2));
10571     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10572   }
10573 }
10574 
10575 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10576 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10577   SDValue N0 = N->getOperand(0);
10578   EVT DstVT = N->getValueType(0);
10579   EVT SrcVT = N0.getValueType();
10580 
10581   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10582           N->getOpcode() == ISD::ZERO_EXTEND) &&
10583          "Unexpected node type (not an extend)!");
10584 
10585   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10586   // For example, on a target with legal v4i32, but illegal v8i32, turn:
10587   //   (v8i32 (sext (v8i16 (load x))))
10588   // into:
10589   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
10590   //                          (v4i32 (sextload (x + 16)))))
10591   // Where uses of the original load, i.e.:
10592   //   (v8i16 (load x))
10593   // are replaced with:
10594   //   (v8i16 (truncate
10595   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
10596   //                            (v4i32 (sextload (x + 16)))))))
10597   //
10598   // This combine is only applicable to illegal, but splittable, vectors.
10599   // All legal types, and illegal non-vector types, are handled elsewhere.
10600   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10601   //
10602   if (N0->getOpcode() != ISD::LOAD)
10603     return SDValue();
10604 
10605   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10606 
10607   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10608       !N0.hasOneUse() || !LN0->isSimple() ||
10609       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10610       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10611     return SDValue();
10612 
10613   SmallVector<SDNode *, 4> SetCCs;
10614   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10615     return SDValue();
10616 
10617   ISD::LoadExtType ExtType =
10618       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10619 
10620   // Try to split the vector types to get down to legal types.
10621   EVT SplitSrcVT = SrcVT;
10622   EVT SplitDstVT = DstVT;
10623   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10624          SplitSrcVT.getVectorNumElements() > 1) {
10625     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10626     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10627   }
10628 
10629   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10630     return SDValue();
10631 
10632   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
10633 
10634   SDLoc DL(N);
10635   const unsigned NumSplits =
10636       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10637   const unsigned Stride = SplitSrcVT.getStoreSize();
10638   SmallVector<SDValue, 4> Loads;
10639   SmallVector<SDValue, 4> Chains;
10640 
10641   SDValue BasePtr = LN0->getBasePtr();
10642   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10643     const unsigned Offset = Idx * Stride;
10644     const Align Align = commonAlignment(LN0->getAlign(), Offset);
10645 
10646     SDValue SplitLoad = DAG.getExtLoad(
10647         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10648         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10649         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10650 
10651     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10652 
10653     Loads.push_back(SplitLoad.getValue(0));
10654     Chains.push_back(SplitLoad.getValue(1));
10655   }
10656 
10657   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10658   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10659 
10660   // Simplify TF.
10661   AddToWorklist(NewChain.getNode());
10662 
10663   CombineTo(N, NewValue);
10664 
10665   // Replace uses of the original load (before extension)
10666   // with a truncate of the concatenated sextloaded vectors.
10667   SDValue Trunc =
10668       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10669   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10670   CombineTo(N0.getNode(), Trunc, NewChain);
10671   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10672 }
10673 
10674 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10675 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10676 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10677   assert(N->getOpcode() == ISD::ZERO_EXTEND);
10678   EVT VT = N->getValueType(0);
10679   EVT OrigVT = N->getOperand(0).getValueType();
10680   if (TLI.isZExtFree(OrigVT, VT))
10681     return SDValue();
10682 
10683   // and/or/xor
10684   SDValue N0 = N->getOperand(0);
10685   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10686         N0.getOpcode() == ISD::XOR) ||
10687       N0.getOperand(1).getOpcode() != ISD::Constant ||
10688       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10689     return SDValue();
10690 
10691   // shl/shr
10692   SDValue N1 = N0->getOperand(0);
10693   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
10694       N1.getOperand(1).getOpcode() != ISD::Constant ||
10695       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
10696     return SDValue();
10697 
10698   // load
10699   if (!isa<LoadSDNode>(N1.getOperand(0)))
10700     return SDValue();
10701   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
10702   EVT MemVT = Load->getMemoryVT();
10703   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
10704       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
10705     return SDValue();
10706 
10707 
10708   // If the shift op is SHL, the logic op must be AND, otherwise the result
10709   // will be wrong.
10710   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
10711     return SDValue();
10712 
10713   if (!N0.hasOneUse() || !N1.hasOneUse())
10714     return SDValue();
10715 
10716   SmallVector<SDNode*, 4> SetCCs;
10717   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
10718                                ISD::ZERO_EXTEND, SetCCs, TLI))
10719     return SDValue();
10720 
10721   // Actually do the transformation.
10722   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
10723                                    Load->getChain(), Load->getBasePtr(),
10724                                    Load->getMemoryVT(), Load->getMemOperand());
10725 
10726   SDLoc DL1(N1);
10727   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
10728                               N1.getOperand(1));
10729 
10730   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10731   SDLoc DL0(N0);
10732   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
10733                             DAG.getConstant(Mask, DL0, VT));
10734 
10735   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10736   CombineTo(N, And);
10737   if (SDValue(Load, 0).hasOneUse()) {
10738     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
10739   } else {
10740     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
10741                                 Load->getValueType(0), ExtLoad);
10742     CombineTo(Load, Trunc, ExtLoad.getValue(1));
10743   }
10744 
10745   // N0 is dead at this point.
10746   recursivelyDeleteUnusedNodes(N0.getNode());
10747 
10748   return SDValue(N,0); // Return N so it doesn't get rechecked!
10749 }
10750 
10751 /// If we're narrowing or widening the result of a vector select and the final
10752 /// size is the same size as a setcc (compare) feeding the select, then try to
10753 /// apply the cast operation to the select's operands because matching vector
10754 /// sizes for a select condition and other operands should be more efficient.
10755 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10756   unsigned CastOpcode = Cast->getOpcode();
10757   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
10758           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
10759           CastOpcode == ISD::FP_ROUND) &&
10760          "Unexpected opcode for vector select narrowing/widening");
10761 
10762   // We only do this transform before legal ops because the pattern may be
10763   // obfuscated by target-specific operations after legalization. Do not create
10764   // an illegal select op, however, because that may be difficult to lower.
10765   EVT VT = Cast->getValueType(0);
10766   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10767     return SDValue();
10768 
10769   SDValue VSel = Cast->getOperand(0);
10770   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
10771       VSel.getOperand(0).getOpcode() != ISD::SETCC)
10772     return SDValue();
10773 
10774   // Does the setcc have the same vector size as the casted select?
10775   SDValue SetCC = VSel.getOperand(0);
10776   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10777   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10778     return SDValue();
10779 
10780   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10781   SDValue A = VSel.getOperand(1);
10782   SDValue B = VSel.getOperand(2);
10783   SDValue CastA, CastB;
10784   SDLoc DL(Cast);
10785   if (CastOpcode == ISD::FP_ROUND) {
10786     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
10787     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10788     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10789   } else {
10790     CastA = DAG.getNode(CastOpcode, DL, VT, A);
10791     CastB = DAG.getNode(CastOpcode, DL, VT, B);
10792   }
10793   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10794 }
10795 
10796 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10797 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10798 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10799                                      const TargetLowering &TLI, EVT VT,
10800                                      bool LegalOperations, SDNode *N,
10801                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
10802   SDNode *N0Node = N0.getNode();
10803   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10804                                                    : ISD::isZEXTLoad(N0Node);
10805   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
10806       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
10807     return SDValue();
10808 
10809   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10810   EVT MemVT = LN0->getMemoryVT();
10811   if ((LegalOperations || !LN0->isSimple() ||
10812        VT.isVector()) &&
10813       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10814     return SDValue();
10815 
10816   SDValue ExtLoad =
10817       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10818                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10819   Combiner.CombineTo(N, ExtLoad);
10820   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10821   if (LN0->use_empty())
10822     Combiner.recursivelyDeleteUnusedNodes(LN0);
10823   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10824 }
10825 
10826 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10827 // Only generate vector extloads when 1) they're legal, and 2) they are
10828 // deemed desirable by the target.
10829 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10830                                   const TargetLowering &TLI, EVT VT,
10831                                   bool LegalOperations, SDNode *N, SDValue N0,
10832                                   ISD::LoadExtType ExtLoadType,
10833                                   ISD::NodeType ExtOpc) {
10834   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
10835       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
10836       ((LegalOperations || VT.isVector() ||
10837         !cast<LoadSDNode>(N0)->isSimple()) &&
10838        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10839     return {};
10840 
10841   bool DoXform = true;
10842   SmallVector<SDNode *, 4> SetCCs;
10843   if (!N0.hasOneUse())
10844     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10845   if (VT.isVector())
10846     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10847   if (!DoXform)
10848     return {};
10849 
10850   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10851   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10852                                    LN0->getBasePtr(), N0.getValueType(),
10853                                    LN0->getMemOperand());
10854   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10855   // If the load value is used only by N, replace it via CombineTo N.
10856   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10857   Combiner.CombineTo(N, ExtLoad);
10858   if (NoReplaceTrunc) {
10859     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10860     Combiner.recursivelyDeleteUnusedNodes(LN0);
10861   } else {
10862     SDValue Trunc =
10863         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10864     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10865   }
10866   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10867 }
10868 
10869 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10870                                         const TargetLowering &TLI, EVT VT,
10871                                         SDNode *N, SDValue N0,
10872                                         ISD::LoadExtType ExtLoadType,
10873                                         ISD::NodeType ExtOpc) {
10874   if (!N0.hasOneUse())
10875     return SDValue();
10876 
10877   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10878   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
10879     return SDValue();
10880 
10881   if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
10882     return SDValue();
10883 
10884   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10885     return SDValue();
10886 
10887   SDLoc dl(Ld);
10888   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10889   SDValue NewLoad = DAG.getMaskedLoad(
10890       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10891       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10892       ExtLoadType, Ld->isExpandingLoad());
10893   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10894   return NewLoad;
10895 }
10896 
10897 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10898                                        bool LegalOperations) {
10899   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10900           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
10901 
10902   SDValue SetCC = N->getOperand(0);
10903   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
10904       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
10905     return SDValue();
10906 
10907   SDValue X = SetCC.getOperand(0);
10908   SDValue Ones = SetCC.getOperand(1);
10909   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10910   EVT VT = N->getValueType(0);
10911   EVT XVT = X.getValueType();
10912   // setge X, C is canonicalized to setgt, so we do not need to match that
10913   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10914   // not require the 'not' op.
10915   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10916     // Invert and smear/shift the sign bit:
10917     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10918     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10919     SDLoc DL(N);
10920     unsigned ShCt = VT.getSizeInBits() - 1;
10921     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10922     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10923       SDValue NotX = DAG.getNOT(DL, X, VT);
10924       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10925       auto ShiftOpcode =
10926         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10927       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10928     }
10929   }
10930   return SDValue();
10931 }
10932 
10933 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
10934   SDValue N0 = N->getOperand(0);
10935   if (N0.getOpcode() != ISD::SETCC)
10936     return SDValue();
10937 
10938   SDValue N00 = N0.getOperand(0);
10939   SDValue N01 = N0.getOperand(1);
10940   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10941   EVT VT = N->getValueType(0);
10942   EVT N00VT = N00.getValueType();
10943   SDLoc DL(N);
10944 
10945   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10946   // the same size as the compared operands. Try to optimize sext(setcc())
10947   // if this is the case.
10948   if (VT.isVector() && !LegalOperations &&
10949       TLI.getBooleanContents(N00VT) ==
10950           TargetLowering::ZeroOrNegativeOneBooleanContent) {
10951     EVT SVT = getSetCCResultType(N00VT);
10952 
10953     // If we already have the desired type, don't change it.
10954     if (SVT != N0.getValueType()) {
10955       // We know that the # elements of the results is the same as the
10956       // # elements of the compare (and the # elements of the compare result
10957       // for that matter).  Check to see that they are the same size.  If so,
10958       // we know that the element size of the sext'd result matches the
10959       // element size of the compare operands.
10960       if (VT.getSizeInBits() == SVT.getSizeInBits())
10961         return DAG.getSetCC(DL, VT, N00, N01, CC);
10962 
10963       // If the desired elements are smaller or larger than the source
10964       // elements, we can use a matching integer vector type and then
10965       // truncate/sign extend.
10966       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10967       if (SVT == MatchingVecType) {
10968         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10969         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10970       }
10971     }
10972 
10973     // Try to eliminate the sext of a setcc by zexting the compare operands.
10974     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
10975         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
10976       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
10977       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10978       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10979 
10980       // We have an unsupported narrow vector compare op that would be legal
10981       // if extended to the destination type. See if the compare operands
10982       // can be freely extended to the destination type.
10983       auto IsFreeToExtend = [&](SDValue V) {
10984         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
10985           return true;
10986         // Match a simple, non-extended load that can be converted to a
10987         // legal {z/s}ext-load.
10988         // TODO: Allow widening of an existing {z/s}ext-load?
10989         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
10990               ISD::isUNINDEXEDLoad(V.getNode()) &&
10991               cast<LoadSDNode>(V)->isSimple() &&
10992               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
10993           return false;
10994 
10995         // Non-chain users of this value must either be the setcc in this
10996         // sequence or extends that can be folded into the new {z/s}ext-load.
10997         for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
10998              UI != UE; ++UI) {
10999           // Skip uses of the chain and the setcc.
11000           SDNode *User = *UI;
11001           if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11002             continue;
11003           // Extra users must have exactly the same cast we are about to create.
11004           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11005           //       is enhanced similarly.
11006           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11007             return false;
11008         }
11009         return true;
11010       };
11011 
11012       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11013         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11014         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11015         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11016       }
11017     }
11018   }
11019 
11020   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11021   // Here, T can be 1 or -1, depending on the type of the setcc and
11022   // getBooleanContents().
11023   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11024 
11025   // To determine the "true" side of the select, we need to know the high bit
11026   // of the value returned by the setcc if it evaluates to true.
11027   // If the type of the setcc is i1, then the true case of the select is just
11028   // sext(i1 1), that is, -1.
11029   // If the type of the setcc is larger (say, i8) then the value of the high
11030   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11031   // of the appropriate width.
11032   SDValue ExtTrueVal = (SetCCWidth == 1)
11033                            ? DAG.getAllOnesConstant(DL, VT)
11034                            : DAG.getBoolConstant(true, DL, VT, N00VT);
11035   SDValue Zero = DAG.getConstant(0, DL, VT);
11036   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11037     return SCC;
11038 
11039   if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11040     EVT SetCCVT = getSetCCResultType(N00VT);
11041     // Don't do this transform for i1 because there's a select transform
11042     // that would reverse it.
11043     // TODO: We should not do this transform at all without a target hook
11044     // because a sext is likely cheaper than a select?
11045     if (SetCCVT.getScalarSizeInBits() != 1 &&
11046         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11047       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11048       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11049     }
11050   }
11051 
11052   return SDValue();
11053 }
11054 
11055 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11056   SDValue N0 = N->getOperand(0);
11057   EVT VT = N->getValueType(0);
11058   SDLoc DL(N);
11059 
11060   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11061     return Res;
11062 
11063   // fold (sext (sext x)) -> (sext x)
11064   // fold (sext (aext x)) -> (sext x)
11065   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11066     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11067 
11068   if (N0.getOpcode() == ISD::TRUNCATE) {
11069     // fold (sext (truncate (load x))) -> (sext (smaller load x))
11070     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11071     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11072       SDNode *oye = N0.getOperand(0).getNode();
11073       if (NarrowLoad.getNode() != N0.getNode()) {
11074         CombineTo(N0.getNode(), NarrowLoad);
11075         // CombineTo deleted the truncate, if needed, but not what's under it.
11076         AddToWorklist(oye);
11077       }
11078       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11079     }
11080 
11081     // See if the value being truncated is already sign extended.  If so, just
11082     // eliminate the trunc/sext pair.
11083     SDValue Op = N0.getOperand(0);
11084     unsigned OpBits   = Op.getScalarValueSizeInBits();
11085     unsigned MidBits  = N0.getScalarValueSizeInBits();
11086     unsigned DestBits = VT.getScalarSizeInBits();
11087     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11088 
11089     if (OpBits == DestBits) {
11090       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
11091       // bits, it is already ready.
11092       if (NumSignBits > DestBits-MidBits)
11093         return Op;
11094     } else if (OpBits < DestBits) {
11095       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
11096       // bits, just sext from i32.
11097       if (NumSignBits > OpBits-MidBits)
11098         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11099     } else {
11100       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
11101       // bits, just truncate to i32.
11102       if (NumSignBits > OpBits-MidBits)
11103         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11104     }
11105 
11106     // fold (sext (truncate x)) -> (sextinreg x).
11107     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11108                                                  N0.getValueType())) {
11109       if (OpBits < DestBits)
11110         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11111       else if (OpBits > DestBits)
11112         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11113       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11114                          DAG.getValueType(N0.getValueType()));
11115     }
11116   }
11117 
11118   // Try to simplify (sext (load x)).
11119   if (SDValue foldedExt =
11120           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11121                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11122     return foldedExt;
11123 
11124   if (SDValue foldedExt =
11125       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11126                                ISD::SIGN_EXTEND))
11127     return foldedExt;
11128 
11129   // fold (sext (load x)) to multiple smaller sextloads.
11130   // Only on illegal but splittable vectors.
11131   if (SDValue ExtLoad = CombineExtLoad(N))
11132     return ExtLoad;
11133 
11134   // Try to simplify (sext (sextload x)).
11135   if (SDValue foldedExt = tryToFoldExtOfExtload(
11136           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11137     return foldedExt;
11138 
11139   // fold (sext (and/or/xor (load x), cst)) ->
11140   //      (and/or/xor (sextload x), (sext cst))
11141   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11142        N0.getOpcode() == ISD::XOR) &&
11143       isa<LoadSDNode>(N0.getOperand(0)) &&
11144       N0.getOperand(1).getOpcode() == ISD::Constant &&
11145       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11146     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11147     EVT MemVT = LN00->getMemoryVT();
11148     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11149       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11150       SmallVector<SDNode*, 4> SetCCs;
11151       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11152                                              ISD::SIGN_EXTEND, SetCCs, TLI);
11153       if (DoXform) {
11154         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11155                                          LN00->getChain(), LN00->getBasePtr(),
11156                                          LN00->getMemoryVT(),
11157                                          LN00->getMemOperand());
11158         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11159         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11160                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11161         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11162         bool NoReplaceTruncAnd = !N0.hasOneUse();
11163         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11164         CombineTo(N, And);
11165         // If N0 has multiple uses, change other uses as well.
11166         if (NoReplaceTruncAnd) {
11167           SDValue TruncAnd =
11168               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11169           CombineTo(N0.getNode(), TruncAnd);
11170         }
11171         if (NoReplaceTrunc) {
11172           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11173         } else {
11174           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11175                                       LN00->getValueType(0), ExtLoad);
11176           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11177         }
11178         return SDValue(N,0); // Return N so it doesn't get rechecked!
11179       }
11180     }
11181   }
11182 
11183   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11184     return V;
11185 
11186   if (SDValue V = foldSextSetcc(N))
11187     return V;
11188 
11189   // fold (sext x) -> (zext x) if the sign bit is known zero.
11190   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11191       DAG.SignBitIsZero(N0))
11192     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11193 
11194   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11195     return NewVSel;
11196 
11197   // Eliminate this sign extend by doing a negation in the destination type:
11198   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11199   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11200       isNullOrNullSplat(N0.getOperand(0)) &&
11201       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11202       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11203     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11204     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11205   }
11206   // Eliminate this sign extend by doing a decrement in the destination type:
11207   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11208   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11209       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11210       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11211       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11212     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11213     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11214   }
11215 
11216   // fold sext (not i1 X) -> add (zext i1 X), -1
11217   // TODO: This could be extended to handle bool vectors.
11218   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11219       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11220                             TLI.isOperationLegal(ISD::ADD, VT)))) {
11221     // If we can eliminate the 'not', the sext form should be better
11222     if (SDValue NewXor = visitXOR(N0.getNode())) {
11223       // Returning N0 is a form of in-visit replacement that may have
11224       // invalidated N0.
11225       if (NewXor.getNode() == N0.getNode()) {
11226         // Return SDValue here as the xor should have already been replaced in
11227         // this sext.
11228         return SDValue();
11229       } else {
11230         // Return a new sext with the new xor.
11231         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11232       }
11233     }
11234 
11235     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11236     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11237   }
11238 
11239   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11240     return Res;
11241 
11242   return SDValue();
11243 }
11244 
11245 // isTruncateOf - If N is a truncate of some other value, return true, record
11246 // the value being truncated in Op and which of Op's bits are zero/one in Known.
11247 // This function computes KnownBits to avoid a duplicated call to
11248 // computeKnownBits in the caller.
11249 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11250                          KnownBits &Known) {
11251   if (N->getOpcode() == ISD::TRUNCATE) {
11252     Op = N->getOperand(0);
11253     Known = DAG.computeKnownBits(Op);
11254     return true;
11255   }
11256 
11257   if (N.getOpcode() != ISD::SETCC ||
11258       N.getValueType().getScalarType() != MVT::i1 ||
11259       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11260     return false;
11261 
11262   SDValue Op0 = N->getOperand(0);
11263   SDValue Op1 = N->getOperand(1);
11264   assert(Op0.getValueType() == Op1.getValueType());
11265 
11266   if (isNullOrNullSplat(Op0))
11267     Op = Op1;
11268   else if (isNullOrNullSplat(Op1))
11269     Op = Op0;
11270   else
11271     return false;
11272 
11273   Known = DAG.computeKnownBits(Op);
11274 
11275   return (Known.Zero | 1).isAllOnes();
11276 }
11277 
11278 /// Given an extending node with a pop-count operand, if the target does not
11279 /// support a pop-count in the narrow source type but does support it in the
11280 /// destination type, widen the pop-count to the destination type.
11281 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11282   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
11283           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
11284 
11285   SDValue CtPop = Extend->getOperand(0);
11286   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11287     return SDValue();
11288 
11289   EVT VT = Extend->getValueType(0);
11290   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11291   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11292       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11293     return SDValue();
11294 
11295   // zext (ctpop X) --> ctpop (zext X)
11296   SDLoc DL(Extend);
11297   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11298   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11299 }
11300 
11301 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11302   SDValue N0 = N->getOperand(0);
11303   EVT VT = N->getValueType(0);
11304 
11305   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11306     return Res;
11307 
11308   // fold (zext (zext x)) -> (zext x)
11309   // fold (zext (aext x)) -> (zext x)
11310   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11311     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11312                        N0.getOperand(0));
11313 
11314   // fold (zext (truncate x)) -> (zext x) or
11315   //      (zext (truncate x)) -> (truncate x)
11316   // This is valid when the truncated bits of x are already zero.
11317   SDValue Op;
11318   KnownBits Known;
11319   if (isTruncateOf(DAG, N0, Op, Known)) {
11320     APInt TruncatedBits =
11321       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11322       APInt(Op.getScalarValueSizeInBits(), 0) :
11323       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11324                         N0.getScalarValueSizeInBits(),
11325                         std::min(Op.getScalarValueSizeInBits(),
11326                                  VT.getScalarSizeInBits()));
11327     if (TruncatedBits.isSubsetOf(Known.Zero))
11328       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11329   }
11330 
11331   // fold (zext (truncate x)) -> (and x, mask)
11332   if (N0.getOpcode() == ISD::TRUNCATE) {
11333     // fold (zext (truncate (load x))) -> (zext (smaller load x))
11334     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11335     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11336       SDNode *oye = N0.getOperand(0).getNode();
11337       if (NarrowLoad.getNode() != N0.getNode()) {
11338         CombineTo(N0.getNode(), NarrowLoad);
11339         // CombineTo deleted the truncate, if needed, but not what's under it.
11340         AddToWorklist(oye);
11341       }
11342       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11343     }
11344 
11345     EVT SrcVT = N0.getOperand(0).getValueType();
11346     EVT MinVT = N0.getValueType();
11347 
11348     // Try to mask before the extension to avoid having to generate a larger mask,
11349     // possibly over several sub-vectors.
11350     if (SrcVT.bitsLT(VT) && VT.isVector()) {
11351       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11352                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11353         SDValue Op = N0.getOperand(0);
11354         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11355         AddToWorklist(Op.getNode());
11356         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11357         // Transfer the debug info; the new node is equivalent to N0.
11358         DAG.transferDbgValues(N0, ZExtOrTrunc);
11359         return ZExtOrTrunc;
11360       }
11361     }
11362 
11363     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11364       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11365       AddToWorklist(Op.getNode());
11366       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11367       // We may safely transfer the debug info describing the truncate node over
11368       // to the equivalent and operation.
11369       DAG.transferDbgValues(N0, And);
11370       return And;
11371     }
11372   }
11373 
11374   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11375   // if either of the casts is not free.
11376   if (N0.getOpcode() == ISD::AND &&
11377       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11378       N0.getOperand(1).getOpcode() == ISD::Constant &&
11379       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11380                            N0.getValueType()) ||
11381        !TLI.isZExtFree(N0.getValueType(), VT))) {
11382     SDValue X = N0.getOperand(0).getOperand(0);
11383     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11384     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11385     SDLoc DL(N);
11386     return DAG.getNode(ISD::AND, DL, VT,
11387                        X, DAG.getConstant(Mask, DL, VT));
11388   }
11389 
11390   // Try to simplify (zext (load x)).
11391   if (SDValue foldedExt =
11392           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11393                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11394     return foldedExt;
11395 
11396   if (SDValue foldedExt =
11397       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11398                                ISD::ZERO_EXTEND))
11399     return foldedExt;
11400 
11401   // fold (zext (load x)) to multiple smaller zextloads.
11402   // Only on illegal but splittable vectors.
11403   if (SDValue ExtLoad = CombineExtLoad(N))
11404     return ExtLoad;
11405 
11406   // fold (zext (and/or/xor (load x), cst)) ->
11407   //      (and/or/xor (zextload x), (zext cst))
11408   // Unless (and (load x) cst) will match as a zextload already and has
11409   // additional users.
11410   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11411        N0.getOpcode() == ISD::XOR) &&
11412       isa<LoadSDNode>(N0.getOperand(0)) &&
11413       N0.getOperand(1).getOpcode() == ISD::Constant &&
11414       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11415     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11416     EVT MemVT = LN00->getMemoryVT();
11417     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11418         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11419       bool DoXform = true;
11420       SmallVector<SDNode*, 4> SetCCs;
11421       if (!N0.hasOneUse()) {
11422         if (N0.getOpcode() == ISD::AND) {
11423           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11424           EVT LoadResultTy = AndC->getValueType(0);
11425           EVT ExtVT;
11426           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11427             DoXform = false;
11428         }
11429       }
11430       if (DoXform)
11431         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11432                                           ISD::ZERO_EXTEND, SetCCs, TLI);
11433       if (DoXform) {
11434         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11435                                          LN00->getChain(), LN00->getBasePtr(),
11436                                          LN00->getMemoryVT(),
11437                                          LN00->getMemOperand());
11438         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11439         SDLoc DL(N);
11440         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11441                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11442         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11443         bool NoReplaceTruncAnd = !N0.hasOneUse();
11444         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11445         CombineTo(N, And);
11446         // If N0 has multiple uses, change other uses as well.
11447         if (NoReplaceTruncAnd) {
11448           SDValue TruncAnd =
11449               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11450           CombineTo(N0.getNode(), TruncAnd);
11451         }
11452         if (NoReplaceTrunc) {
11453           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11454         } else {
11455           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11456                                       LN00->getValueType(0), ExtLoad);
11457           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11458         }
11459         return SDValue(N,0); // Return N so it doesn't get rechecked!
11460       }
11461     }
11462   }
11463 
11464   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11465   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11466   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11467     return ZExtLoad;
11468 
11469   // Try to simplify (zext (zextload x)).
11470   if (SDValue foldedExt = tryToFoldExtOfExtload(
11471           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11472     return foldedExt;
11473 
11474   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11475     return V;
11476 
11477   if (N0.getOpcode() == ISD::SETCC) {
11478     // Only do this before legalize for now.
11479     if (!LegalOperations && VT.isVector() &&
11480         N0.getValueType().getVectorElementType() == MVT::i1) {
11481       EVT N00VT = N0.getOperand(0).getValueType();
11482       if (getSetCCResultType(N00VT) == N0.getValueType())
11483         return SDValue();
11484 
11485       // We know that the # elements of the results is the same as the #
11486       // elements of the compare (and the # elements of the compare result for
11487       // that matter). Check to see that they are the same size. If so, we know
11488       // that the element size of the sext'd result matches the element size of
11489       // the compare operands.
11490       SDLoc DL(N);
11491       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11492         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11493         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11494                                      N0.getOperand(1), N0.getOperand(2));
11495         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11496       }
11497 
11498       // If the desired elements are smaller or larger than the source
11499       // elements we can use a matching integer vector type and then
11500       // truncate/any extend followed by zext_in_reg.
11501       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11502       SDValue VsetCC =
11503           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11504                       N0.getOperand(1), N0.getOperand(2));
11505       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11506                                     N0.getValueType());
11507     }
11508 
11509     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11510     SDLoc DL(N);
11511     EVT N0VT = N0.getValueType();
11512     EVT N00VT = N0.getOperand(0).getValueType();
11513     if (SDValue SCC = SimplifySelectCC(
11514             DL, N0.getOperand(0), N0.getOperand(1),
11515             DAG.getBoolConstant(true, DL, N0VT, N00VT),
11516             DAG.getBoolConstant(false, DL, N0VT, N00VT),
11517             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11518       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11519   }
11520 
11521   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11522   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11523       isa<ConstantSDNode>(N0.getOperand(1)) &&
11524       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11525       N0.hasOneUse()) {
11526     SDValue ShAmt = N0.getOperand(1);
11527     if (N0.getOpcode() == ISD::SHL) {
11528       SDValue InnerZExt = N0.getOperand(0);
11529       // If the original shl may be shifting out bits, do not perform this
11530       // transformation.
11531       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11532         InnerZExt.getOperand(0).getValueSizeInBits();
11533       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11534         return SDValue();
11535     }
11536 
11537     SDLoc DL(N);
11538 
11539     // Ensure that the shift amount is wide enough for the shifted value.
11540     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11541       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11542 
11543     return DAG.getNode(N0.getOpcode(), DL, VT,
11544                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11545                        ShAmt);
11546   }
11547 
11548   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11549     return NewVSel;
11550 
11551   if (SDValue NewCtPop = widenCtPop(N, DAG))
11552     return NewCtPop;
11553 
11554   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11555     return Res;
11556 
11557   return SDValue();
11558 }
11559 
11560 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11561   SDValue N0 = N->getOperand(0);
11562   EVT VT = N->getValueType(0);
11563 
11564   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11565     return Res;
11566 
11567   // fold (aext (aext x)) -> (aext x)
11568   // fold (aext (zext x)) -> (zext x)
11569   // fold (aext (sext x)) -> (sext x)
11570   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
11571       N0.getOpcode() == ISD::ZERO_EXTEND ||
11572       N0.getOpcode() == ISD::SIGN_EXTEND)
11573     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11574 
11575   // fold (aext (truncate (load x))) -> (aext (smaller load x))
11576   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11577   if (N0.getOpcode() == ISD::TRUNCATE) {
11578     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11579       SDNode *oye = N0.getOperand(0).getNode();
11580       if (NarrowLoad.getNode() != N0.getNode()) {
11581         CombineTo(N0.getNode(), NarrowLoad);
11582         // CombineTo deleted the truncate, if needed, but not what's under it.
11583         AddToWorklist(oye);
11584       }
11585       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11586     }
11587   }
11588 
11589   // fold (aext (truncate x))
11590   if (N0.getOpcode() == ISD::TRUNCATE)
11591     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11592 
11593   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11594   // if the trunc is not free.
11595   if (N0.getOpcode() == ISD::AND &&
11596       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11597       N0.getOperand(1).getOpcode() == ISD::Constant &&
11598       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11599                           N0.getValueType())) {
11600     SDLoc DL(N);
11601     SDValue X = N0.getOperand(0).getOperand(0);
11602     X = DAG.getAnyExtOrTrunc(X, DL, VT);
11603     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11604     return DAG.getNode(ISD::AND, DL, VT,
11605                        X, DAG.getConstant(Mask, DL, VT));
11606   }
11607 
11608   // fold (aext (load x)) -> (aext (truncate (extload x)))
11609   // None of the supported targets knows how to perform load and any_ext
11610   // on vectors in one instruction, so attempt to fold to zext instead.
11611   if (VT.isVector()) {
11612     // Try to simplify (zext (load x)).
11613     if (SDValue foldedExt =
11614             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11615                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11616       return foldedExt;
11617   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11618              ISD::isUNINDEXEDLoad(N0.getNode()) &&
11619              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11620     bool DoXform = true;
11621     SmallVector<SDNode *, 4> SetCCs;
11622     if (!N0.hasOneUse())
11623       DoXform =
11624           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11625     if (DoXform) {
11626       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11627       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11628                                        LN0->getChain(), LN0->getBasePtr(),
11629                                        N0.getValueType(), LN0->getMemOperand());
11630       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11631       // If the load value is used only by N, replace it via CombineTo N.
11632       bool NoReplaceTrunc = N0.hasOneUse();
11633       CombineTo(N, ExtLoad);
11634       if (NoReplaceTrunc) {
11635         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11636         recursivelyDeleteUnusedNodes(LN0);
11637       } else {
11638         SDValue Trunc =
11639             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11640         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11641       }
11642       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11643     }
11644   }
11645 
11646   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11647   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11648   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
11649   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11650       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11651     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11652     ISD::LoadExtType ExtType = LN0->getExtensionType();
11653     EVT MemVT = LN0->getMemoryVT();
11654     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11655       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11656                                        VT, LN0->getChain(), LN0->getBasePtr(),
11657                                        MemVT, LN0->getMemOperand());
11658       CombineTo(N, ExtLoad);
11659       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11660       recursivelyDeleteUnusedNodes(LN0);
11661       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11662     }
11663   }
11664 
11665   if (N0.getOpcode() == ISD::SETCC) {
11666     // For vectors:
11667     // aext(setcc) -> vsetcc
11668     // aext(setcc) -> truncate(vsetcc)
11669     // aext(setcc) -> aext(vsetcc)
11670     // Only do this before legalize for now.
11671     if (VT.isVector() && !LegalOperations) {
11672       EVT N00VT = N0.getOperand(0).getValueType();
11673       if (getSetCCResultType(N00VT) == N0.getValueType())
11674         return SDValue();
11675 
11676       // We know that the # elements of the results is the same as the
11677       // # elements of the compare (and the # elements of the compare result
11678       // for that matter).  Check to see that they are the same size.  If so,
11679       // we know that the element size of the sext'd result matches the
11680       // element size of the compare operands.
11681       if (VT.getSizeInBits() == N00VT.getSizeInBits())
11682         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11683                              N0.getOperand(1),
11684                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
11685 
11686       // If the desired elements are smaller or larger than the source
11687       // elements we can use a matching integer vector type and then
11688       // truncate/any extend
11689       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11690       SDValue VsetCC =
11691         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11692                       N0.getOperand(1),
11693                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
11694       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
11695     }
11696 
11697     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
11698     SDLoc DL(N);
11699     if (SDValue SCC = SimplifySelectCC(
11700             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
11701             DAG.getConstant(0, DL, VT),
11702             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11703       return SCC;
11704   }
11705 
11706   if (SDValue NewCtPop = widenCtPop(N, DAG))
11707     return NewCtPop;
11708 
11709   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11710     return Res;
11711 
11712   return SDValue();
11713 }
11714 
11715 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
11716   unsigned Opcode = N->getOpcode();
11717   SDValue N0 = N->getOperand(0);
11718   SDValue N1 = N->getOperand(1);
11719   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
11720 
11721   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
11722   if (N0.getOpcode() == Opcode &&
11723       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
11724     return N0;
11725 
11726   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11727       N0.getOperand(0).getOpcode() == Opcode) {
11728     // We have an assert, truncate, assert sandwich. Make one stronger assert
11729     // by asserting on the smallest asserted type to the larger source type.
11730     // This eliminates the later assert:
11731     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
11732     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
11733     SDValue BigA = N0.getOperand(0);
11734     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11735     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
11736            "Asserting zero/sign-extended bits to a type larger than the "
11737            "truncated destination does not provide information");
11738 
11739     SDLoc DL(N);
11740     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
11741     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
11742     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11743                                     BigA.getOperand(0), MinAssertVTVal);
11744     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11745   }
11746 
11747   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
11748   // than X. Just move the AssertZext in front of the truncate and drop the
11749   // AssertSExt.
11750   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11751       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
11752       Opcode == ISD::AssertZext) {
11753     SDValue BigA = N0.getOperand(0);
11754     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11755     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
11756            "Asserting zero/sign-extended bits to a type larger than the "
11757            "truncated destination does not provide information");
11758 
11759     if (AssertVT.bitsLT(BigA_AssertVT)) {
11760       SDLoc DL(N);
11761       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11762                                       BigA.getOperand(0), N1);
11763       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11764     }
11765   }
11766 
11767   return SDValue();
11768 }
11769 
11770 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
11771   SDLoc DL(N);
11772 
11773   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
11774   SDValue N0 = N->getOperand(0);
11775 
11776   // Fold (assertalign (assertalign x, AL0), AL1) ->
11777   // (assertalign x, max(AL0, AL1))
11778   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
11779     return DAG.getAssertAlign(DL, N0.getOperand(0),
11780                               std::max(AL, AAN->getAlign()));
11781 
11782   // In rare cases, there are trivial arithmetic ops in source operands. Sink
11783   // this assert down to source operands so that those arithmetic ops could be
11784   // exposed to the DAG combining.
11785   switch (N0.getOpcode()) {
11786   default:
11787     break;
11788   case ISD::ADD:
11789   case ISD::SUB: {
11790     unsigned AlignShift = Log2(AL);
11791     SDValue LHS = N0.getOperand(0);
11792     SDValue RHS = N0.getOperand(1);
11793     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
11794     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11795     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
11796       if (LHSAlignShift < AlignShift)
11797         LHS = DAG.getAssertAlign(DL, LHS, AL);
11798       if (RHSAlignShift < AlignShift)
11799         RHS = DAG.getAssertAlign(DL, RHS, AL);
11800       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
11801     }
11802     break;
11803   }
11804   }
11805 
11806   return SDValue();
11807 }
11808 
11809 /// If the result of a wider load is shifted to right of N  bits and then
11810 /// truncated to a narrower type and where N is a multiple of number of bits of
11811 /// the narrower type, transform it to a narrower load from address + N / num of
11812 /// bits of new type. Also narrow the load if the result is masked with an AND
11813 /// to effectively produce a smaller type. If the result is to be extended, also
11814 /// fold the extension to form a extending load.
11815 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
11816   unsigned Opc = N->getOpcode();
11817 
11818   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
11819   SDValue N0 = N->getOperand(0);
11820   EVT VT = N->getValueType(0);
11821   EVT ExtVT = VT;
11822 
11823   // This transformation isn't valid for vector loads.
11824   if (VT.isVector())
11825     return SDValue();
11826 
11827   unsigned ShAmt = 0;
11828   bool HasShiftedOffset = false;
11829   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
11830   // extended to VT.
11831   if (Opc == ISD::SIGN_EXTEND_INREG) {
11832     ExtType = ISD::SEXTLOAD;
11833     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11834   } else if (Opc == ISD::SRL) {
11835     // Another special-case: SRL is basically zero-extending a narrower value,
11836     // or it maybe shifting a higher subword, half or byte into the lowest
11837     // bits.
11838     ExtType = ISD::ZEXTLOAD;
11839     N0 = SDValue(N, 0);
11840 
11841     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11842     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11843     if (!N01 || !LN0)
11844       return SDValue();
11845 
11846     uint64_t ShiftAmt = N01->getZExtValue();
11847     uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
11848     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11849       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11850     else
11851       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11852                                 VT.getScalarSizeInBits() - ShiftAmt);
11853   } else if (Opc == ISD::AND) {
11854     // An AND with a constant mask is the same as a truncate + zero-extend.
11855     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11856     if (!AndC)
11857       return SDValue();
11858 
11859     const APInt &Mask = AndC->getAPIntValue();
11860     unsigned ActiveBits = 0;
11861     if (Mask.isMask()) {
11862       ActiveBits = Mask.countTrailingOnes();
11863     } else if (Mask.isShiftedMask()) {
11864       ShAmt = Mask.countTrailingZeros();
11865       APInt ShiftedMask = Mask.lshr(ShAmt);
11866       ActiveBits = ShiftedMask.countTrailingOnes();
11867       HasShiftedOffset = true;
11868     } else
11869       return SDValue();
11870 
11871     ExtType = ISD::ZEXTLOAD;
11872     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11873   }
11874 
11875   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11876     SDValue SRL = N0;
11877     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11878       ShAmt = ConstShift->getZExtValue();
11879       unsigned EVTBits = ExtVT.getScalarSizeInBits();
11880       // Is the shift amount a multiple of size of VT?
11881       if ((ShAmt & (EVTBits-1)) == 0) {
11882         N0 = N0.getOperand(0);
11883         // Is the load width a multiple of size of VT?
11884         if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
11885           return SDValue();
11886       }
11887 
11888       // At this point, we must have a load or else we can't do the transform.
11889       auto *LN0 = dyn_cast<LoadSDNode>(N0);
11890       if (!LN0) return SDValue();
11891 
11892       // Because a SRL must be assumed to *need* to zero-extend the high bits
11893       // (as opposed to anyext the high bits), we can't combine the zextload
11894       // lowering of SRL and an sextload.
11895       if (LN0->getExtensionType() == ISD::SEXTLOAD)
11896         return SDValue();
11897 
11898       // If the shift amount is larger than the input type then we're not
11899       // accessing any of the loaded bytes.  If the load was a zextload/extload
11900       // then the result of the shift+trunc is zero/undef (handled elsewhere).
11901       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11902         return SDValue();
11903 
11904       // If the SRL is only used by a masking AND, we may be able to adjust
11905       // the ExtVT to make the AND redundant.
11906       SDNode *Mask = *(SRL->use_begin());
11907       if (Mask->getOpcode() == ISD::AND &&
11908           isa<ConstantSDNode>(Mask->getOperand(1))) {
11909         const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11910         if (ShiftMask.isMask()) {
11911           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11912                                            ShiftMask.countTrailingOnes());
11913           // If the mask is smaller, recompute the type.
11914           if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
11915               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11916             ExtVT = MaskedVT;
11917         }
11918       }
11919     }
11920   }
11921 
11922   // If the load is shifted left (and the result isn't shifted back right),
11923   // we can fold the truncate through the shift.
11924   unsigned ShLeftAmt = 0;
11925   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11926       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11927     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11928       ShLeftAmt = N01->getZExtValue();
11929       N0 = N0.getOperand(0);
11930     }
11931   }
11932 
11933   // If we haven't found a load, we can't narrow it.
11934   if (!isa<LoadSDNode>(N0))
11935     return SDValue();
11936 
11937   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11938   // Reducing the width of a volatile load is illegal.  For atomics, we may be
11939   // able to reduce the width provided we never widen again. (see D66309)
11940   if (!LN0->isSimple() ||
11941       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11942     return SDValue();
11943 
11944   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11945     unsigned LVTStoreBits =
11946         LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
11947     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
11948     return LVTStoreBits - EVTStoreBits - ShAmt;
11949   };
11950 
11951   // For big endian targets, we need to adjust the offset to the pointer to
11952   // load the correct bytes.
11953   if (DAG.getDataLayout().isBigEndian())
11954     ShAmt = AdjustBigEndianShift(ShAmt);
11955 
11956   uint64_t PtrOff = ShAmt / 8;
11957   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11958   SDLoc DL(LN0);
11959   // The original load itself didn't wrap, so an offset within it doesn't.
11960   SDNodeFlags Flags;
11961   Flags.setNoUnsignedWrap(true);
11962   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11963                                             TypeSize::Fixed(PtrOff), DL, Flags);
11964   AddToWorklist(NewPtr.getNode());
11965 
11966   SDValue Load;
11967   if (ExtType == ISD::NON_EXTLOAD)
11968     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11969                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11970                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11971   else
11972     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11973                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11974                           NewAlign, LN0->getMemOperand()->getFlags(),
11975                           LN0->getAAInfo());
11976 
11977   // Replace the old load's chain with the new load's chain.
11978   WorklistRemover DeadNodes(*this);
11979   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11980 
11981   // Shift the result left, if we've swallowed a left shift.
11982   SDValue Result = Load;
11983   if (ShLeftAmt != 0) {
11984     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11985     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
11986       ShImmTy = VT;
11987     // If the shift amount is as large as the result size (but, presumably,
11988     // no larger than the source) then the useful bits of the result are
11989     // zero; we can't simply return the shortened shift, because the result
11990     // of that operation is undefined.
11991     if (ShLeftAmt >= VT.getScalarSizeInBits())
11992       Result = DAG.getConstant(0, DL, VT);
11993     else
11994       Result = DAG.getNode(ISD::SHL, DL, VT,
11995                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11996   }
11997 
11998   if (HasShiftedOffset) {
11999     // Recalculate the shift amount after it has been altered to calculate
12000     // the offset.
12001     if (DAG.getDataLayout().isBigEndian())
12002       ShAmt = AdjustBigEndianShift(ShAmt);
12003 
12004     // We're using a shifted mask, so the load now has an offset. This means
12005     // that data has been loaded into the lower bytes than it would have been
12006     // before, so we need to shl the loaded data into the correct position in the
12007     // register.
12008     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12009     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12010     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12011   }
12012 
12013   // Return the new loaded value.
12014   return Result;
12015 }
12016 
12017 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12018   SDValue N0 = N->getOperand(0);
12019   SDValue N1 = N->getOperand(1);
12020   EVT VT = N->getValueType(0);
12021   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12022   unsigned VTBits = VT.getScalarSizeInBits();
12023   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12024 
12025   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12026   if (N0.isUndef())
12027     return DAG.getConstant(0, SDLoc(N), VT);
12028 
12029   // fold (sext_in_reg c1) -> c1
12030   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12031     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12032 
12033   // If the input is already sign extended, just drop the extension.
12034   if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
12035     return N0;
12036 
12037   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12038   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12039       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12040     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12041                        N1);
12042 
12043   // fold (sext_in_reg (sext x)) -> (sext x)
12044   // fold (sext_in_reg (aext x)) -> (sext x)
12045   // if x is small enough or if we know that x has more than 1 sign bit and the
12046   // sign_extend_inreg is extending from one of them.
12047   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12048     SDValue N00 = N0.getOperand(0);
12049     unsigned N00Bits = N00.getScalarValueSizeInBits();
12050     if ((N00Bits <= ExtVTBits ||
12051          (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
12052         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12053       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12054   }
12055 
12056   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12057   // if x is small enough or if we know that x has more than 1 sign bit and the
12058   // sign_extend_inreg is extending from one of them.
12059   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12060       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12061       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12062     SDValue N00 = N0.getOperand(0);
12063     unsigned N00Bits = N00.getScalarValueSizeInBits();
12064     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12065     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12066     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12067     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12068     if ((N00Bits == ExtVTBits ||
12069          (!IsZext && (N00Bits < ExtVTBits ||
12070                       (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
12071                           ExtVTBits))) &&
12072         (!LegalOperations ||
12073          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12074       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12075   }
12076 
12077   // fold (sext_in_reg (zext x)) -> (sext x)
12078   // iff we are extending the source sign bit.
12079   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12080     SDValue N00 = N0.getOperand(0);
12081     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12082         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12083       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12084   }
12085 
12086   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12087   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12088     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12089 
12090   // fold operands of sext_in_reg based on knowledge that the top bits are not
12091   // demanded.
12092   if (SimplifyDemandedBits(SDValue(N, 0)))
12093     return SDValue(N, 0);
12094 
12095   // fold (sext_in_reg (load x)) -> (smaller sextload x)
12096   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12097   if (SDValue NarrowLoad = ReduceLoadWidth(N))
12098     return NarrowLoad;
12099 
12100   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12101   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12102   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12103   if (N0.getOpcode() == ISD::SRL) {
12104     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12105       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12106         // We can turn this into an SRA iff the input to the SRL is already sign
12107         // extended enough.
12108         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12109         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12110           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12111                              N0.getOperand(1));
12112       }
12113   }
12114 
12115   // fold (sext_inreg (extload x)) -> (sextload x)
12116   // If sextload is not supported by target, we can only do the combine when
12117   // load has one use. Doing otherwise can block folding the extload with other
12118   // extends that the target does support.
12119   if (ISD::isEXTLoad(N0.getNode()) &&
12120       ISD::isUNINDEXEDLoad(N0.getNode()) &&
12121       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12122       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12123         N0.hasOneUse()) ||
12124        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12125     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12126     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12127                                      LN0->getChain(),
12128                                      LN0->getBasePtr(), ExtVT,
12129                                      LN0->getMemOperand());
12130     CombineTo(N, ExtLoad);
12131     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12132     AddToWorklist(ExtLoad.getNode());
12133     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12134   }
12135 
12136   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12137   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12138       N0.hasOneUse() &&
12139       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12140       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12141        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12142     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12143     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12144                                      LN0->getChain(),
12145                                      LN0->getBasePtr(), ExtVT,
12146                                      LN0->getMemOperand());
12147     CombineTo(N, ExtLoad);
12148     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12149     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12150   }
12151 
12152   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12153   // ignore it if the masked load is already sign extended
12154   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12155     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12156         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12157         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12158       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12159           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12160           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12161           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12162       CombineTo(N, ExtMaskedLoad);
12163       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12164       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12165     }
12166   }
12167 
12168   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12169   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12170     if (SDValue(GN0, 0).hasOneUse() &&
12171         ExtVT == GN0->getMemoryVT() &&
12172         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12173       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
12174                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
12175 
12176       SDValue ExtLoad = DAG.getMaskedGather(
12177           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12178           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12179 
12180       CombineTo(N, ExtLoad);
12181       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12182       AddToWorklist(ExtLoad.getNode());
12183       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12184     }
12185   }
12186 
12187   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12188   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12189     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12190                                            N0.getOperand(1), false))
12191       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12192   }
12193 
12194   return SDValue();
12195 }
12196 
12197 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12198   SDValue N0 = N->getOperand(0);
12199   EVT VT = N->getValueType(0);
12200 
12201   // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12202   if (N0.isUndef())
12203     return DAG.getConstant(0, SDLoc(N), VT);
12204 
12205   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12206     return Res;
12207 
12208   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12209     return SDValue(N, 0);
12210 
12211   return SDValue();
12212 }
12213 
12214 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12215   SDValue N0 = N->getOperand(0);
12216   EVT VT = N->getValueType(0);
12217   EVT SrcVT = N0.getValueType();
12218   bool isLE = DAG.getDataLayout().isLittleEndian();
12219 
12220   // noop truncate
12221   if (SrcVT == VT)
12222     return N0;
12223 
12224   // fold (truncate (truncate x)) -> (truncate x)
12225   if (N0.getOpcode() == ISD::TRUNCATE)
12226     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12227 
12228   // fold (truncate c1) -> c1
12229   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12230     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12231     if (C.getNode() != N)
12232       return C;
12233   }
12234 
12235   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12236   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12237       N0.getOpcode() == ISD::SIGN_EXTEND ||
12238       N0.getOpcode() == ISD::ANY_EXTEND) {
12239     // if the source is smaller than the dest, we still need an extend.
12240     if (N0.getOperand(0).getValueType().bitsLT(VT))
12241       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12242     // if the source is larger than the dest, than we just need the truncate.
12243     if (N0.getOperand(0).getValueType().bitsGT(VT))
12244       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12245     // if the source and dest are the same type, we can drop both the extend
12246     // and the truncate.
12247     return N0.getOperand(0);
12248   }
12249 
12250   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12251   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12252     return SDValue();
12253 
12254   // Fold extract-and-trunc into a narrow extract. For example:
12255   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12256   //   i32 y = TRUNCATE(i64 x)
12257   //        -- becomes --
12258   //   v16i8 b = BITCAST (v2i64 val)
12259   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12260   //
12261   // Note: We only run this optimization after type legalization (which often
12262   // creates this pattern) and before operation legalization after which
12263   // we need to be more careful about the vector instructions that we generate.
12264   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12265       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12266     EVT VecTy = N0.getOperand(0).getValueType();
12267     EVT ExTy = N0.getValueType();
12268     EVT TrTy = N->getValueType(0);
12269 
12270     auto EltCnt = VecTy.getVectorElementCount();
12271     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12272     auto NewEltCnt = EltCnt * SizeRatio;
12273 
12274     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12275     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
12276 
12277     SDValue EltNo = N0->getOperand(1);
12278     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12279       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12280       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12281 
12282       SDLoc DL(N);
12283       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12284                          DAG.getBitcast(NVT, N0.getOperand(0)),
12285                          DAG.getVectorIdxConstant(Index, DL));
12286     }
12287   }
12288 
12289   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12290   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12291     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12292         TLI.isTruncateFree(SrcVT, VT)) {
12293       SDLoc SL(N0);
12294       SDValue Cond = N0.getOperand(0);
12295       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12296       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12297       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12298     }
12299   }
12300 
12301   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12302   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12303       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12304       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12305     SDValue Amt = N0.getOperand(1);
12306     KnownBits Known = DAG.computeKnownBits(Amt);
12307     unsigned Size = VT.getScalarSizeInBits();
12308     if (Known.countMaxActiveBits() <= Log2_32(Size)) {
12309       SDLoc SL(N);
12310       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12311 
12312       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12313       if (AmtVT != Amt.getValueType()) {
12314         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12315         AddToWorklist(Amt.getNode());
12316       }
12317       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12318     }
12319   }
12320 
12321   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12322     return V;
12323 
12324   // Attempt to pre-truncate BUILD_VECTOR sources.
12325   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12326       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12327       // Avoid creating illegal types if running after type legalizer.
12328       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12329     SDLoc DL(N);
12330     EVT SVT = VT.getScalarType();
12331     SmallVector<SDValue, 8> TruncOps;
12332     for (const SDValue &Op : N0->op_values()) {
12333       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12334       TruncOps.push_back(TruncOp);
12335     }
12336     return DAG.getBuildVector(VT, DL, TruncOps);
12337   }
12338 
12339   // Fold a series of buildvector, bitcast, and truncate if possible.
12340   // For example fold
12341   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12342   //   (2xi32 (buildvector x, y)).
12343   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12344       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12345       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12346       N0.getOperand(0).hasOneUse()) {
12347     SDValue BuildVect = N0.getOperand(0);
12348     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12349     EVT TruncVecEltTy = VT.getVectorElementType();
12350 
12351     // Check that the element types match.
12352     if (BuildVectEltTy == TruncVecEltTy) {
12353       // Now we only need to compute the offset of the truncated elements.
12354       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
12355       unsigned TruncVecNumElts = VT.getVectorNumElements();
12356       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12357 
12358       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
12359              "Invalid number of elements");
12360 
12361       SmallVector<SDValue, 8> Opnds;
12362       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12363         Opnds.push_back(BuildVect.getOperand(i));
12364 
12365       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12366     }
12367   }
12368 
12369   // See if we can simplify the input to this truncate through knowledge that
12370   // only the low bits are being used.
12371   // For example "trunc (or (shl x, 8), y)" // -> trunc y
12372   // Currently we only perform this optimization on scalars because vectors
12373   // may have different active low bits.
12374   if (!VT.isVector()) {
12375     APInt Mask =
12376         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12377     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12378       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12379   }
12380 
12381   // fold (truncate (load x)) -> (smaller load x)
12382   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12383   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12384     if (SDValue Reduced = ReduceLoadWidth(N))
12385       return Reduced;
12386 
12387     // Handle the case where the load remains an extending load even
12388     // after truncation.
12389     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12390       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12391       if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12392         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12393                                          VT, LN0->getChain(), LN0->getBasePtr(),
12394                                          LN0->getMemoryVT(),
12395                                          LN0->getMemOperand());
12396         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12397         return NewLoad;
12398       }
12399     }
12400   }
12401 
12402   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12403   // where ... are all 'undef'.
12404   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12405     SmallVector<EVT, 8> VTs;
12406     SDValue V;
12407     unsigned Idx = 0;
12408     unsigned NumDefs = 0;
12409 
12410     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12411       SDValue X = N0.getOperand(i);
12412       if (!X.isUndef()) {
12413         V = X;
12414         Idx = i;
12415         NumDefs++;
12416       }
12417       // Stop if more than one members are non-undef.
12418       if (NumDefs > 1)
12419         break;
12420 
12421       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12422                                      VT.getVectorElementType(),
12423                                      X.getValueType().getVectorElementCount()));
12424     }
12425 
12426     if (NumDefs == 0)
12427       return DAG.getUNDEF(VT);
12428 
12429     if (NumDefs == 1) {
12430       assert(V.getNode() && "The single defined operand is empty!");
12431       SmallVector<SDValue, 8> Opnds;
12432       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12433         if (i != Idx) {
12434           Opnds.push_back(DAG.getUNDEF(VTs[i]));
12435           continue;
12436         }
12437         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12438         AddToWorklist(NV.getNode());
12439         Opnds.push_back(NV);
12440       }
12441       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12442     }
12443   }
12444 
12445   // Fold truncate of a bitcast of a vector to an extract of the low vector
12446   // element.
12447   //
12448   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12449   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12450     SDValue VecSrc = N0.getOperand(0);
12451     EVT VecSrcVT = VecSrc.getValueType();
12452     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12453         (!LegalOperations ||
12454          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12455       SDLoc SL(N);
12456 
12457       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12458       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12459                          DAG.getVectorIdxConstant(Idx, SL));
12460     }
12461   }
12462 
12463   // Simplify the operands using demanded-bits information.
12464   if (SimplifyDemandedBits(SDValue(N, 0)))
12465     return SDValue(N, 0);
12466 
12467   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12468   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12469   // When the adde's carry is not used.
12470   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12471       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12472       // We only do for addcarry before legalize operation
12473       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12474        TLI.isOperationLegal(N0.getOpcode(), VT))) {
12475     SDLoc SL(N);
12476     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12477     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12478     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12479     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12480   }
12481 
12482   // fold (truncate (extract_subvector(ext x))) ->
12483   //      (extract_subvector x)
12484   // TODO: This can be generalized to cover cases where the truncate and extract
12485   // do not fully cancel each other out.
12486   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12487     SDValue N00 = N0.getOperand(0);
12488     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12489         N00.getOpcode() == ISD::ZERO_EXTEND ||
12490         N00.getOpcode() == ISD::ANY_EXTEND) {
12491       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12492           VT.getVectorElementType())
12493         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12494                            N00.getOperand(0), N0.getOperand(1));
12495     }
12496   }
12497 
12498   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12499     return NewVSel;
12500 
12501   // Narrow a suitable binary operation with a non-opaque constant operand by
12502   // moving it ahead of the truncate. This is limited to pre-legalization
12503   // because targets may prefer a wider type during later combines and invert
12504   // this transform.
12505   switch (N0.getOpcode()) {
12506   case ISD::ADD:
12507   case ISD::SUB:
12508   case ISD::MUL:
12509   case ISD::AND:
12510   case ISD::OR:
12511   case ISD::XOR:
12512     if (!LegalOperations && N0.hasOneUse() &&
12513         (isConstantOrConstantVector(N0.getOperand(0), true) ||
12514          isConstantOrConstantVector(N0.getOperand(1), true))) {
12515       // TODO: We already restricted this to pre-legalization, but for vectors
12516       // we are extra cautious to not create an unsupported operation.
12517       // Target-specific changes are likely needed to avoid regressions here.
12518       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12519         SDLoc DL(N);
12520         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12521         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12522         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12523       }
12524     }
12525     break;
12526   case ISD::USUBSAT:
12527     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12528     // enough to know that the upper bits are zero we must ensure that we don't
12529     // introduce an extra truncate.
12530     if (!LegalOperations && N0.hasOneUse() &&
12531         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12532         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12533             VT.getScalarSizeInBits() &&
12534         hasOperation(N0.getOpcode(), VT)) {
12535       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12536                                  DAG, SDLoc(N));
12537     }
12538     break;
12539   }
12540 
12541   return SDValue();
12542 }
12543 
12544 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12545   SDValue Elt = N->getOperand(i);
12546   if (Elt.getOpcode() != ISD::MERGE_VALUES)
12547     return Elt.getNode();
12548   return Elt.getOperand(Elt.getResNo()).getNode();
12549 }
12550 
12551 /// build_pair (load, load) -> load
12552 /// if load locations are consecutive.
12553 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12554   assert(N->getOpcode() == ISD::BUILD_PAIR);
12555 
12556   auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12557   auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12558 
12559   // A BUILD_PAIR is always having the least significant part in elt 0 and the
12560   // most significant part in elt 1. So when combining into one large load, we
12561   // need to consider the endianness.
12562   if (DAG.getDataLayout().isBigEndian())
12563     std::swap(LD1, LD2);
12564 
12565   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
12566       !LD1->hasOneUse() || !LD2->hasOneUse() ||
12567       LD1->getAddressSpace() != LD2->getAddressSpace())
12568     return SDValue();
12569 
12570   bool LD1Fast = false;
12571   EVT LD1VT = LD1->getValueType(0);
12572   unsigned LD1Bytes = LD1VT.getStoreSize();
12573   if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
12574       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
12575       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
12576                              *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
12577     return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12578                        LD1->getPointerInfo(), LD1->getAlign());
12579 
12580   return SDValue();
12581 }
12582 
12583 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12584   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12585   // and Lo parts; on big-endian machines it doesn't.
12586   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12587 }
12588 
12589 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12590                                     const TargetLowering &TLI) {
12591   // If this is not a bitcast to an FP type or if the target doesn't have
12592   // IEEE754-compliant FP logic, we're done.
12593   EVT VT = N->getValueType(0);
12594   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12595     return SDValue();
12596 
12597   // TODO: Handle cases where the integer constant is a different scalar
12598   // bitwidth to the FP.
12599   SDValue N0 = N->getOperand(0);
12600   EVT SourceVT = N0.getValueType();
12601   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12602     return SDValue();
12603 
12604   unsigned FPOpcode;
12605   APInt SignMask;
12606   switch (N0.getOpcode()) {
12607   case ISD::AND:
12608     FPOpcode = ISD::FABS;
12609     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12610     break;
12611   case ISD::XOR:
12612     FPOpcode = ISD::FNEG;
12613     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12614     break;
12615   case ISD::OR:
12616     FPOpcode = ISD::FABS;
12617     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12618     break;
12619   default:
12620     return SDValue();
12621   }
12622 
12623   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12624   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12625   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12626   //   fneg (fabs X)
12627   SDValue LogicOp0 = N0.getOperand(0);
12628   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12629   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12630       LogicOp0.getOpcode() == ISD::BITCAST &&
12631       LogicOp0.getOperand(0).getValueType() == VT) {
12632     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12633     NumFPLogicOpsConv++;
12634     if (N0.getOpcode() == ISD::OR)
12635       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12636     return FPOp;
12637   }
12638 
12639   return SDValue();
12640 }
12641 
12642 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12643   SDValue N0 = N->getOperand(0);
12644   EVT VT = N->getValueType(0);
12645 
12646   if (N0.isUndef())
12647     return DAG.getUNDEF(VT);
12648 
12649   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12650   // Only do this before legalize types, unless both types are integer and the
12651   // scalar type is legal. Only do this before legalize ops, since the target
12652   // maybe depending on the bitcast.
12653   // First check to see if this is all constant.
12654   // TODO: Support FP bitcasts after legalize types.
12655   if (VT.isVector() &&
12656       (!LegalTypes ||
12657        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12658         TLI.isTypeLegal(VT.getVectorElementType()))) &&
12659       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12660       cast<BuildVectorSDNode>(N0)->isConstant())
12661     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12662                                              VT.getVectorElementType());
12663 
12664   // If the input is a constant, let getNode fold it.
12665   if (isIntOrFPConstant(N0)) {
12666     // If we can't allow illegal operations, we need to check that this is just
12667     // a fp -> int or int -> conversion and that the resulting operation will
12668     // be legal.
12669     if (!LegalOperations ||
12670         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12671          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
12672         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
12673          TLI.isOperationLegal(ISD::Constant, VT))) {
12674       SDValue C = DAG.getBitcast(VT, N0);
12675       if (C.getNode() != N)
12676         return C;
12677     }
12678   }
12679 
12680   // (conv (conv x, t1), t2) -> (conv x, t2)
12681   if (N0.getOpcode() == ISD::BITCAST)
12682     return DAG.getBitcast(VT, N0.getOperand(0));
12683 
12684   // fold (conv (load x)) -> (load (conv*)x)
12685   // If the resultant load doesn't need a higher alignment than the original!
12686   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12687       // Do not remove the cast if the types differ in endian layout.
12688       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
12689           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
12690       // If the load is volatile, we only want to change the load type if the
12691       // resulting load is legal. Otherwise we might increase the number of
12692       // memory accesses. We don't care if the original type was legal or not
12693       // as we assume software couldn't rely on the number of accesses of an
12694       // illegal type.
12695       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
12696        TLI.isOperationLegal(ISD::LOAD, VT))) {
12697     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12698 
12699     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
12700                                     *LN0->getMemOperand())) {
12701       SDValue Load =
12702           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12703                       LN0->getPointerInfo(), LN0->getAlign(),
12704                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12705       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12706       return Load;
12707     }
12708   }
12709 
12710   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
12711     return V;
12712 
12713   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12714   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12715   //
12716   // For ppc_fp128:
12717   // fold (bitcast (fneg x)) ->
12718   //     flipbit = signbit
12719   //     (xor (bitcast x) (build_pair flipbit, flipbit))
12720   //
12721   // fold (bitcast (fabs x)) ->
12722   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
12723   //     (xor (bitcast x) (build_pair flipbit, flipbit))
12724   // This often reduces constant pool loads.
12725   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
12726        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
12727       N0.getNode()->hasOneUse() && VT.isInteger() &&
12728       !VT.isVector() && !N0.getValueType().isVector()) {
12729     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
12730     AddToWorklist(NewConv.getNode());
12731 
12732     SDLoc DL(N);
12733     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12734       assert(VT.getSizeInBits() == 128);
12735       SDValue SignBit = DAG.getConstant(
12736           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
12737       SDValue FlipBit;
12738       if (N0.getOpcode() == ISD::FNEG) {
12739         FlipBit = SignBit;
12740         AddToWorklist(FlipBit.getNode());
12741       } else {
12742         assert(N0.getOpcode() == ISD::FABS);
12743         SDValue Hi =
12744             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
12745                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12746                                               SDLoc(NewConv)));
12747         AddToWorklist(Hi.getNode());
12748         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
12749         AddToWorklist(FlipBit.getNode());
12750       }
12751       SDValue FlipBits =
12752           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12753       AddToWorklist(FlipBits.getNode());
12754       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
12755     }
12756     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12757     if (N0.getOpcode() == ISD::FNEG)
12758       return DAG.getNode(ISD::XOR, DL, VT,
12759                          NewConv, DAG.getConstant(SignBit, DL, VT));
12760     assert(N0.getOpcode() == ISD::FABS);
12761     return DAG.getNode(ISD::AND, DL, VT,
12762                        NewConv, DAG.getConstant(~SignBit, DL, VT));
12763   }
12764 
12765   // fold (bitconvert (fcopysign cst, x)) ->
12766   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
12767   // Note that we don't handle (copysign x, cst) because this can always be
12768   // folded to an fneg or fabs.
12769   //
12770   // For ppc_fp128:
12771   // fold (bitcast (fcopysign cst, x)) ->
12772   //     flipbit = (and (extract_element
12773   //                     (xor (bitcast cst), (bitcast x)), 0),
12774   //                    signbit)
12775   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
12776   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
12777       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
12778       VT.isInteger() && !VT.isVector()) {
12779     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
12780     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
12781     if (isTypeLegal(IntXVT)) {
12782       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
12783       AddToWorklist(X.getNode());
12784 
12785       // If X has a different width than the result/lhs, sext it or truncate it.
12786       unsigned VTWidth = VT.getSizeInBits();
12787       if (OrigXWidth < VTWidth) {
12788         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
12789         AddToWorklist(X.getNode());
12790       } else if (OrigXWidth > VTWidth) {
12791         // To get the sign bit in the right place, we have to shift it right
12792         // before truncating.
12793         SDLoc DL(X);
12794         X = DAG.getNode(ISD::SRL, DL,
12795                         X.getValueType(), X,
12796                         DAG.getConstant(OrigXWidth-VTWidth, DL,
12797                                         X.getValueType()));
12798         AddToWorklist(X.getNode());
12799         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
12800         AddToWorklist(X.getNode());
12801       }
12802 
12803       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12804         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
12805         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12806         AddToWorklist(Cst.getNode());
12807         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
12808         AddToWorklist(X.getNode());
12809         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
12810         AddToWorklist(XorResult.getNode());
12811         SDValue XorResult64 = DAG.getNode(
12812             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
12813             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12814                                   SDLoc(XorResult)));
12815         AddToWorklist(XorResult64.getNode());
12816         SDValue FlipBit =
12817             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
12818                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
12819         AddToWorklist(FlipBit.getNode());
12820         SDValue FlipBits =
12821             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12822         AddToWorklist(FlipBits.getNode());
12823         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
12824       }
12825       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12826       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
12827                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
12828       AddToWorklist(X.getNode());
12829 
12830       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12831       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
12832                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
12833       AddToWorklist(Cst.getNode());
12834 
12835       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
12836     }
12837   }
12838 
12839   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
12840   if (N0.getOpcode() == ISD::BUILD_PAIR)
12841     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
12842       return CombineLD;
12843 
12844   // Remove double bitcasts from shuffles - this is often a legacy of
12845   // XformToShuffleWithZero being used to combine bitmaskings (of
12846   // float vectors bitcast to integer vectors) into shuffles.
12847   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
12848   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
12849       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
12850       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
12851       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
12852     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
12853 
12854     // If operands are a bitcast, peek through if it casts the original VT.
12855     // If operands are a constant, just bitcast back to original VT.
12856     auto PeekThroughBitcast = [&](SDValue Op) {
12857       if (Op.getOpcode() == ISD::BITCAST &&
12858           Op.getOperand(0).getValueType() == VT)
12859         return SDValue(Op.getOperand(0));
12860       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
12861           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12862         return DAG.getBitcast(VT, Op);
12863       return SDValue();
12864     };
12865 
12866     // FIXME: If either input vector is bitcast, try to convert the shuffle to
12867     // the result type of this bitcast. This would eliminate at least one
12868     // bitcast. See the transform in InstCombine.
12869     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12870     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12871     if (!(SV0 && SV1))
12872       return SDValue();
12873 
12874     int MaskScale =
12875         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12876     SmallVector<int, 8> NewMask;
12877     for (int M : SVN->getMask())
12878       for (int i = 0; i != MaskScale; ++i)
12879         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12880 
12881     SDValue LegalShuffle =
12882         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12883     if (LegalShuffle)
12884       return LegalShuffle;
12885   }
12886 
12887   return SDValue();
12888 }
12889 
12890 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12891   EVT VT = N->getValueType(0);
12892   return CombineConsecutiveLoads(N, VT);
12893 }
12894 
12895 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12896   SDValue N0 = N->getOperand(0);
12897 
12898   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
12899     return N0;
12900 
12901   return SDValue();
12902 }
12903 
12904 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12905 /// operands. DstEltVT indicates the destination element value type.
12906 SDValue DAGCombiner::
12907 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12908   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12909 
12910   // If this is already the right type, we're done.
12911   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12912 
12913   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12914   unsigned DstBitSize = DstEltVT.getSizeInBits();
12915 
12916   // If this is a conversion of N elements of one type to N elements of another
12917   // type, convert each element.  This handles FP<->INT cases.
12918   if (SrcBitSize == DstBitSize) {
12919     SmallVector<SDValue, 8> Ops;
12920     for (SDValue Op : BV->op_values()) {
12921       // If the vector element type is not legal, the BUILD_VECTOR operands
12922       // are promoted and implicitly truncated.  Make that explicit here.
12923       if (Op.getValueType() != SrcEltVT)
12924         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12925       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12926       AddToWorklist(Ops.back().getNode());
12927     }
12928     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12929                               BV->getValueType(0).getVectorNumElements());
12930     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12931   }
12932 
12933   // Otherwise, we're growing or shrinking the elements.  To avoid having to
12934   // handle annoying details of growing/shrinking FP values, we convert them to
12935   // int first.
12936   if (SrcEltVT.isFloatingPoint()) {
12937     // Convert the input float vector to a int vector where the elements are the
12938     // same sizes.
12939     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12940     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12941     SrcEltVT = IntVT;
12942   }
12943 
12944   // Now we know the input is an integer vector.  If the output is a FP type,
12945   // convert to integer first, then to FP of the right size.
12946   if (DstEltVT.isFloatingPoint()) {
12947     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12948     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12949 
12950     // Next, convert to FP elements of the same size.
12951     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12952   }
12953 
12954   SDLoc DL(BV);
12955 
12956   // Okay, we know the src/dst types are both integers of differing types.
12957   // Handling growing first.
12958   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
12959   if (SrcBitSize < DstBitSize) {
12960     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12961 
12962     SmallVector<SDValue, 8> Ops;
12963     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12964          i += NumInputsPerOutput) {
12965       bool isLE = DAG.getDataLayout().isLittleEndian();
12966       APInt NewBits = APInt(DstBitSize, 0);
12967       bool EltIsUndef = true;
12968       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12969         // Shift the previously computed bits over.
12970         NewBits <<= SrcBitSize;
12971         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12972         if (Op.isUndef()) continue;
12973         EltIsUndef = false;
12974 
12975         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
12976                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
12977       }
12978 
12979       if (EltIsUndef)
12980         Ops.push_back(DAG.getUNDEF(DstEltVT));
12981       else
12982         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12983     }
12984 
12985     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12986     return DAG.getBuildVector(VT, DL, Ops);
12987   }
12988 
12989   // Finally, this must be the case where we are shrinking elements: each input
12990   // turns into multiple outputs.
12991   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12992   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12993                             NumOutputsPerInput*BV->getNumOperands());
12994   SmallVector<SDValue, 8> Ops;
12995 
12996   for (const SDValue &Op : BV->op_values()) {
12997     if (Op.isUndef()) {
12998       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
12999       continue;
13000     }
13001 
13002     APInt OpVal = cast<ConstantSDNode>(Op)->
13003                   getAPIntValue().zextOrTrunc(SrcBitSize);
13004 
13005     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
13006       APInt ThisVal = OpVal.trunc(DstBitSize);
13007       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
13008       OpVal.lshrInPlace(DstBitSize);
13009     }
13010 
13011     // For big endian targets, swap the order of the pieces of each element.
13012     if (DAG.getDataLayout().isBigEndian())
13013       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
13014   }
13015 
13016   return DAG.getBuildVector(VT, DL, Ops);
13017 }
13018 
13019 // Returns true if floating point contraction is allowed on the FMUL-SDValue
13020 // `N`
13021 static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
13022   assert(N.getOpcode() == ISD::FMUL);
13023 
13024   return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13025          N->getFlags().hasAllowContract();
13026 }
13027 
13028 // Return true if `N` can assume no infinities involved in it's computation.
13029 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
13030   return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
13031 }
13032 
13033 /// Try to perform FMA combining on a given FADD node.
13034 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13035   SDValue N0 = N->getOperand(0);
13036   SDValue N1 = N->getOperand(1);
13037   EVT VT = N->getValueType(0);
13038   SDLoc SL(N);
13039 
13040   const TargetOptions &Options = DAG.getTarget().Options;
13041 
13042   // Floating-point multiply-add with intermediate rounding.
13043   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13044 
13045   // Floating-point multiply-add without intermediate rounding.
13046   bool HasFMA =
13047       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13048       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13049 
13050   // No valid opcode, do not combine.
13051   if (!HasFMAD && !HasFMA)
13052     return SDValue();
13053 
13054   bool CanReassociate =
13055       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13056   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13057                               Options.UnsafeFPMath || HasFMAD);
13058   // If the addition is not contractable, do not combine.
13059   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13060     return SDValue();
13061 
13062   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13063     return SDValue();
13064 
13065   // Always prefer FMAD to FMA for precision.
13066   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13067   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13068 
13069   auto isFusedOp = [&](SDValue N) {
13070     unsigned Opcode = N.getOpcode();
13071     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13072   };
13073 
13074   // Is the node an FMUL and contractable either due to global flags or
13075   // SDNodeFlags.
13076   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13077     if (N.getOpcode() != ISD::FMUL)
13078       return false;
13079     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13080   };
13081   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13082   // prefer to fold the multiply with fewer uses.
13083   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13084     if (N0.getNode()->use_size() > N1.getNode()->use_size())
13085       std::swap(N0, N1);
13086   }
13087 
13088   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13089   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13090     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13091                        N0.getOperand(1), N1);
13092   }
13093 
13094   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13095   // Note: Commutes FADD operands.
13096   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13097     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13098                        N1.getOperand(1), N0);
13099   }
13100 
13101   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13102   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13103   // This requires reassociation because it changes the order of operations.
13104   SDValue FMA, E;
13105   if (CanReassociate && isFusedOp(N0) &&
13106       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13107       N0.getOperand(2).hasOneUse()) {
13108     FMA = N0;
13109     E = N1;
13110   } else if (CanReassociate && isFusedOp(N1) &&
13111              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13112              N1.getOperand(2).hasOneUse()) {
13113     FMA = N1;
13114     E = N0;
13115   }
13116   if (FMA && E) {
13117     SDValue A = FMA.getOperand(0);
13118     SDValue B = FMA.getOperand(1);
13119     SDValue C = FMA.getOperand(2).getOperand(0);
13120     SDValue D = FMA.getOperand(2).getOperand(1);
13121     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13122     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13123   }
13124 
13125   // Look through FP_EXTEND nodes to do more combining.
13126 
13127   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13128   if (N0.getOpcode() == ISD::FP_EXTEND) {
13129     SDValue N00 = N0.getOperand(0);
13130     if (isContractableFMUL(N00) &&
13131         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13132                             N00.getValueType())) {
13133       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13134                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13135                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13136                          N1);
13137     }
13138   }
13139 
13140   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13141   // Note: Commutes FADD operands.
13142   if (N1.getOpcode() == ISD::FP_EXTEND) {
13143     SDValue N10 = N1.getOperand(0);
13144     if (isContractableFMUL(N10) &&
13145         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13146                             N10.getValueType())) {
13147       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13148                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13149                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13150                          N0);
13151     }
13152   }
13153 
13154   // More folding opportunities when target permits.
13155   if (Aggressive) {
13156     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13157     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
13158     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13159                                     SDValue Z) {
13160       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13161                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13162                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13163                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13164                                      Z));
13165     };
13166     if (isFusedOp(N0)) {
13167       SDValue N02 = N0.getOperand(2);
13168       if (N02.getOpcode() == ISD::FP_EXTEND) {
13169         SDValue N020 = N02.getOperand(0);
13170         if (isContractableFMUL(N020) &&
13171             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13172                                 N020.getValueType())) {
13173           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13174                                       N020.getOperand(0), N020.getOperand(1),
13175                                       N1);
13176         }
13177       }
13178     }
13179 
13180     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13181     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13182     // FIXME: This turns two single-precision and one double-precision
13183     // operation into two double-precision operations, which might not be
13184     // interesting for all targets, especially GPUs.
13185     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13186                                     SDValue Z) {
13187       return DAG.getNode(
13188           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13189           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13190           DAG.getNode(PreferredFusedOpcode, SL, VT,
13191                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13192                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13193     };
13194     if (N0.getOpcode() == ISD::FP_EXTEND) {
13195       SDValue N00 = N0.getOperand(0);
13196       if (isFusedOp(N00)) {
13197         SDValue N002 = N00.getOperand(2);
13198         if (isContractableFMUL(N002) &&
13199             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13200                                 N00.getValueType())) {
13201           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13202                                       N002.getOperand(0), N002.getOperand(1),
13203                                       N1);
13204         }
13205       }
13206     }
13207 
13208     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13209     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
13210     if (isFusedOp(N1)) {
13211       SDValue N12 = N1.getOperand(2);
13212       if (N12.getOpcode() == ISD::FP_EXTEND) {
13213         SDValue N120 = N12.getOperand(0);
13214         if (isContractableFMUL(N120) &&
13215             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13216                                 N120.getValueType())) {
13217           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13218                                       N120.getOperand(0), N120.getOperand(1),
13219                                       N0);
13220         }
13221       }
13222     }
13223 
13224     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13225     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13226     // FIXME: This turns two single-precision and one double-precision
13227     // operation into two double-precision operations, which might not be
13228     // interesting for all targets, especially GPUs.
13229     if (N1.getOpcode() == ISD::FP_EXTEND) {
13230       SDValue N10 = N1.getOperand(0);
13231       if (isFusedOp(N10)) {
13232         SDValue N102 = N10.getOperand(2);
13233         if (isContractableFMUL(N102) &&
13234             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13235                                 N10.getValueType())) {
13236           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13237                                       N102.getOperand(0), N102.getOperand(1),
13238                                       N0);
13239         }
13240       }
13241     }
13242   }
13243 
13244   return SDValue();
13245 }
13246 
13247 /// Try to perform FMA combining on a given FSUB node.
13248 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13249   SDValue N0 = N->getOperand(0);
13250   SDValue N1 = N->getOperand(1);
13251   EVT VT = N->getValueType(0);
13252   SDLoc SL(N);
13253 
13254   const TargetOptions &Options = DAG.getTarget().Options;
13255   // Floating-point multiply-add with intermediate rounding.
13256   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13257 
13258   // Floating-point multiply-add without intermediate rounding.
13259   bool HasFMA =
13260       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13261       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13262 
13263   // No valid opcode, do not combine.
13264   if (!HasFMAD && !HasFMA)
13265     return SDValue();
13266 
13267   const SDNodeFlags Flags = N->getFlags();
13268   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13269                               Options.UnsafeFPMath || HasFMAD);
13270 
13271   // If the subtraction is not contractable, do not combine.
13272   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13273     return SDValue();
13274 
13275   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13276     return SDValue();
13277 
13278   // Always prefer FMAD to FMA for precision.
13279   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13280   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13281   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13282 
13283   // Is the node an FMUL and contractable either due to global flags or
13284   // SDNodeFlags.
13285   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13286     if (N.getOpcode() != ISD::FMUL)
13287       return false;
13288     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13289   };
13290 
13291   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13292   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13293     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13294       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13295                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13296     }
13297     return SDValue();
13298   };
13299 
13300   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13301   // Note: Commutes FSUB operands.
13302   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13303     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13304       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13305                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13306                          YZ.getOperand(1), X);
13307     }
13308     return SDValue();
13309   };
13310 
13311   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13312   // prefer to fold the multiply with fewer uses.
13313   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13314       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13315     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13316     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13317       return V;
13318     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13319     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13320       return V;
13321   } else {
13322     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13323     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13324       return V;
13325     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13326     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13327       return V;
13328   }
13329 
13330   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13331   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13332       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13333     SDValue N00 = N0.getOperand(0).getOperand(0);
13334     SDValue N01 = N0.getOperand(0).getOperand(1);
13335     return DAG.getNode(PreferredFusedOpcode, SL, VT,
13336                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13337                        DAG.getNode(ISD::FNEG, SL, VT, N1));
13338   }
13339 
13340   // Look through FP_EXTEND nodes to do more combining.
13341 
13342   // fold (fsub (fpext (fmul x, y)), z)
13343   //   -> (fma (fpext x), (fpext y), (fneg z))
13344   if (N0.getOpcode() == ISD::FP_EXTEND) {
13345     SDValue N00 = N0.getOperand(0);
13346     if (isContractableFMUL(N00) &&
13347         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13348                             N00.getValueType())) {
13349       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13350                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13351                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13352                          DAG.getNode(ISD::FNEG, SL, VT, N1));
13353     }
13354   }
13355 
13356   // fold (fsub x, (fpext (fmul y, z)))
13357   //   -> (fma (fneg (fpext y)), (fpext z), x)
13358   // Note: Commutes FSUB operands.
13359   if (N1.getOpcode() == ISD::FP_EXTEND) {
13360     SDValue N10 = N1.getOperand(0);
13361     if (isContractableFMUL(N10) &&
13362         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13363                             N10.getValueType())) {
13364       return DAG.getNode(
13365           PreferredFusedOpcode, SL, VT,
13366           DAG.getNode(ISD::FNEG, SL, VT,
13367                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13368           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13369     }
13370   }
13371 
13372   // fold (fsub (fpext (fneg (fmul, x, y))), z)
13373   //   -> (fneg (fma (fpext x), (fpext y), z))
13374   // Note: This could be removed with appropriate canonicalization of the
13375   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13376   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13377   // from implementing the canonicalization in visitFSUB.
13378   if (N0.getOpcode() == ISD::FP_EXTEND) {
13379     SDValue N00 = N0.getOperand(0);
13380     if (N00.getOpcode() == ISD::FNEG) {
13381       SDValue N000 = N00.getOperand(0);
13382       if (isContractableFMUL(N000) &&
13383           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13384                               N00.getValueType())) {
13385         return DAG.getNode(
13386             ISD::FNEG, SL, VT,
13387             DAG.getNode(PreferredFusedOpcode, SL, VT,
13388                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13389                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13390                         N1));
13391       }
13392     }
13393   }
13394 
13395   // fold (fsub (fneg (fpext (fmul, x, y))), z)
13396   //   -> (fneg (fma (fpext x)), (fpext y), z)
13397   // Note: This could be removed with appropriate canonicalization of the
13398   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13399   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13400   // from implementing the canonicalization in visitFSUB.
13401   if (N0.getOpcode() == ISD::FNEG) {
13402     SDValue N00 = N0.getOperand(0);
13403     if (N00.getOpcode() == ISD::FP_EXTEND) {
13404       SDValue N000 = N00.getOperand(0);
13405       if (isContractableFMUL(N000) &&
13406           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13407                               N000.getValueType())) {
13408         return DAG.getNode(
13409             ISD::FNEG, SL, VT,
13410             DAG.getNode(PreferredFusedOpcode, SL, VT,
13411                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13412                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13413                         N1));
13414       }
13415     }
13416   }
13417 
13418   auto isReassociable = [Options](SDNode *N) {
13419     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13420   };
13421 
13422   auto isContractableAndReassociableFMUL = [isContractableFMUL,
13423                                             isReassociable](SDValue N) {
13424     return isContractableFMUL(N) && isReassociable(N.getNode());
13425   };
13426 
13427   auto isFusedOp = [&](SDValue N) {
13428     unsigned Opcode = N.getOpcode();
13429     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13430   };
13431 
13432   // More folding opportunities when target permits.
13433   if (Aggressive && isReassociable(N)) {
13434     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13435     // fold (fsub (fma x, y, (fmul u, v)), z)
13436     //   -> (fma x, y (fma u, v, (fneg z)))
13437     if (CanFuse && isFusedOp(N0) &&
13438         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
13439         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13440       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13441                          N0.getOperand(1),
13442                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13443                                      N0.getOperand(2).getOperand(0),
13444                                      N0.getOperand(2).getOperand(1),
13445                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
13446     }
13447 
13448     // fold (fsub x, (fma y, z, (fmul u, v)))
13449     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
13450     if (CanFuse && isFusedOp(N1) &&
13451         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13452         N1->hasOneUse() && NoSignedZero) {
13453       SDValue N20 = N1.getOperand(2).getOperand(0);
13454       SDValue N21 = N1.getOperand(2).getOperand(1);
13455       return DAG.getNode(
13456           PreferredFusedOpcode, SL, VT,
13457           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13458           DAG.getNode(PreferredFusedOpcode, SL, VT,
13459                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13460     }
13461 
13462     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13463     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13464     if (isFusedOp(N0) && N0->hasOneUse()) {
13465       SDValue N02 = N0.getOperand(2);
13466       if (N02.getOpcode() == ISD::FP_EXTEND) {
13467         SDValue N020 = N02.getOperand(0);
13468         if (isContractableAndReassociableFMUL(N020) &&
13469             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13470                                 N020.getValueType())) {
13471           return DAG.getNode(
13472               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13473               DAG.getNode(
13474                   PreferredFusedOpcode, SL, VT,
13475                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13476                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13477                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13478         }
13479       }
13480     }
13481 
13482     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13483     //   -> (fma (fpext x), (fpext y),
13484     //           (fma (fpext u), (fpext v), (fneg z)))
13485     // FIXME: This turns two single-precision and one double-precision
13486     // operation into two double-precision operations, which might not be
13487     // interesting for all targets, especially GPUs.
13488     if (N0.getOpcode() == ISD::FP_EXTEND) {
13489       SDValue N00 = N0.getOperand(0);
13490       if (isFusedOp(N00)) {
13491         SDValue N002 = N00.getOperand(2);
13492         if (isContractableAndReassociableFMUL(N002) &&
13493             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13494                                 N00.getValueType())) {
13495           return DAG.getNode(
13496               PreferredFusedOpcode, SL, VT,
13497               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13498               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13499               DAG.getNode(
13500                   PreferredFusedOpcode, SL, VT,
13501                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13502                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13503                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13504         }
13505       }
13506     }
13507 
13508     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13509     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13510     if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13511         N1->hasOneUse()) {
13512       SDValue N120 = N1.getOperand(2).getOperand(0);
13513       if (isContractableAndReassociableFMUL(N120) &&
13514           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13515                               N120.getValueType())) {
13516         SDValue N1200 = N120.getOperand(0);
13517         SDValue N1201 = N120.getOperand(1);
13518         return DAG.getNode(
13519             PreferredFusedOpcode, SL, VT,
13520             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13521             DAG.getNode(PreferredFusedOpcode, SL, VT,
13522                         DAG.getNode(ISD::FNEG, SL, VT,
13523                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13524                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13525       }
13526     }
13527 
13528     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13529     //   -> (fma (fneg (fpext y)), (fpext z),
13530     //           (fma (fneg (fpext u)), (fpext v), x))
13531     // FIXME: This turns two single-precision and one double-precision
13532     // operation into two double-precision operations, which might not be
13533     // interesting for all targets, especially GPUs.
13534     if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
13535       SDValue CvtSrc = N1.getOperand(0);
13536       SDValue N100 = CvtSrc.getOperand(0);
13537       SDValue N101 = CvtSrc.getOperand(1);
13538       SDValue N102 = CvtSrc.getOperand(2);
13539       if (isContractableAndReassociableFMUL(N102) &&
13540           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13541                               CvtSrc.getValueType())) {
13542         SDValue N1020 = N102.getOperand(0);
13543         SDValue N1021 = N102.getOperand(1);
13544         return DAG.getNode(
13545             PreferredFusedOpcode, SL, VT,
13546             DAG.getNode(ISD::FNEG, SL, VT,
13547                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13548             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13549             DAG.getNode(PreferredFusedOpcode, SL, VT,
13550                         DAG.getNode(ISD::FNEG, SL, VT,
13551                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13552                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13553       }
13554     }
13555   }
13556 
13557   return SDValue();
13558 }
13559 
13560 /// Try to perform FMA combining on a given FMUL node based on the distributive
13561 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13562 /// subtraction instead of addition).
13563 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13564   SDValue N0 = N->getOperand(0);
13565   SDValue N1 = N->getOperand(1);
13566   EVT VT = N->getValueType(0);
13567   SDLoc SL(N);
13568 
13569   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
13570 
13571   const TargetOptions &Options = DAG.getTarget().Options;
13572 
13573   // The transforms below are incorrect when x == 0 and y == inf, because the
13574   // intermediate multiplication produces a nan.
13575   SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
13576   if (!hasNoInfs(Options, FAdd))
13577     return SDValue();
13578 
13579   // Floating-point multiply-add without intermediate rounding.
13580   bool HasFMA =
13581       isContractableFMUL(Options, SDValue(N, 0)) &&
13582       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13583       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13584 
13585   // Floating-point multiply-add with intermediate rounding. This can result
13586   // in a less precise result due to the changed rounding order.
13587   bool HasFMAD = Options.UnsafeFPMath &&
13588                  (LegalOperations && TLI.isFMADLegal(DAG, N));
13589 
13590   // No valid opcode, do not combine.
13591   if (!HasFMAD && !HasFMA)
13592     return SDValue();
13593 
13594   // Always prefer FMAD to FMA for precision.
13595   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13596   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13597 
13598   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13599   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13600   auto FuseFADD = [&](SDValue X, SDValue Y) {
13601     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13602       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13603         if (C->isExactlyValue(+1.0))
13604           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13605                              Y);
13606         if (C->isExactlyValue(-1.0))
13607           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13608                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13609       }
13610     }
13611     return SDValue();
13612   };
13613 
13614   if (SDValue FMA = FuseFADD(N0, N1))
13615     return FMA;
13616   if (SDValue FMA = FuseFADD(N1, N0))
13617     return FMA;
13618 
13619   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13620   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13621   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13622   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13623   auto FuseFSUB = [&](SDValue X, SDValue Y) {
13624     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13625       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13626         if (C0->isExactlyValue(+1.0))
13627           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13628                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13629                              Y);
13630         if (C0->isExactlyValue(-1.0))
13631           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13632                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13633                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13634       }
13635       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13636         if (C1->isExactlyValue(+1.0))
13637           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13638                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13639         if (C1->isExactlyValue(-1.0))
13640           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13641                              Y);
13642       }
13643     }
13644     return SDValue();
13645   };
13646 
13647   if (SDValue FMA = FuseFSUB(N0, N1))
13648     return FMA;
13649   if (SDValue FMA = FuseFSUB(N1, N0))
13650     return FMA;
13651 
13652   return SDValue();
13653 }
13654 
13655 SDValue DAGCombiner::visitFADD(SDNode *N) {
13656   SDValue N0 = N->getOperand(0);
13657   SDValue N1 = N->getOperand(1);
13658   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13659   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13660   EVT VT = N->getValueType(0);
13661   SDLoc DL(N);
13662   const TargetOptions &Options = DAG.getTarget().Options;
13663   SDNodeFlags Flags = N->getFlags();
13664   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13665 
13666   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13667     return R;
13668 
13669   // fold vector ops
13670   if (VT.isVector())
13671     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13672       return FoldedVOp;
13673 
13674   // fold (fadd c1, c2) -> c1 + c2
13675   if (N0CFP && N1CFP)
13676     return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
13677 
13678   // canonicalize constant to RHS
13679   if (N0CFP && !N1CFP)
13680     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13681 
13682   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13683   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13684   if (N1C && N1C->isZero())
13685     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13686       return N0;
13687 
13688   if (SDValue NewSel = foldBinOpIntoSelect(N))
13689     return NewSel;
13690 
13691   // fold (fadd A, (fneg B)) -> (fsub A, B)
13692   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13693     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13694             N1, DAG, LegalOperations, ForCodeSize))
13695       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13696 
13697   // fold (fadd (fneg A), B) -> (fsub B, A)
13698   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13699     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13700             N0, DAG, LegalOperations, ForCodeSize))
13701       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13702 
13703   auto isFMulNegTwo = [](SDValue FMul) {
13704     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13705       return false;
13706     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13707     return C && C->isExactlyValue(-2.0);
13708   };
13709 
13710   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
13711   if (isFMulNegTwo(N0)) {
13712     SDValue B = N0.getOperand(0);
13713     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13714     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
13715   }
13716   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
13717   if (isFMulNegTwo(N1)) {
13718     SDValue B = N1.getOperand(0);
13719     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13720     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
13721   }
13722 
13723   // No FP constant should be created after legalization as Instruction
13724   // Selection pass has a hard time dealing with FP constants.
13725   bool AllowNewConst = (Level < AfterLegalizeDAG);
13726 
13727   // If nnan is enabled, fold lots of things.
13728   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
13729     // If allowed, fold (fadd (fneg x), x) -> 0.0
13730     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
13731       return DAG.getConstantFP(0.0, DL, VT);
13732 
13733     // If allowed, fold (fadd x, (fneg x)) -> 0.0
13734     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
13735       return DAG.getConstantFP(0.0, DL, VT);
13736   }
13737 
13738   // If 'unsafe math' or reassoc and nsz, fold lots of things.
13739   // TODO: break out portions of the transformations below for which Unsafe is
13740   //       considered and which do not require both nsz and reassoc
13741   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13742        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13743       AllowNewConst) {
13744     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
13745     if (N1CFP && N0.getOpcode() == ISD::FADD &&
13746         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13747       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
13748       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
13749     }
13750 
13751     // We can fold chains of FADD's of the same value into multiplications.
13752     // This transform is not safe in general because we are reducing the number
13753     // of rounding steps.
13754     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
13755       if (N0.getOpcode() == ISD::FMUL) {
13756         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13757         bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
13758 
13759         // (fadd (fmul x, c), x) -> (fmul x, c+1)
13760         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
13761           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13762                                        DAG.getConstantFP(1.0, DL, VT));
13763           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
13764         }
13765 
13766         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
13767         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
13768             N1.getOperand(0) == N1.getOperand(1) &&
13769             N0.getOperand(0) == N1.getOperand(0)) {
13770           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13771                                        DAG.getConstantFP(2.0, DL, VT));
13772           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
13773         }
13774       }
13775 
13776       if (N1.getOpcode() == ISD::FMUL) {
13777         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13778         bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
13779 
13780         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
13781         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
13782           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13783                                        DAG.getConstantFP(1.0, DL, VT));
13784           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
13785         }
13786 
13787         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
13788         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
13789             N0.getOperand(0) == N0.getOperand(1) &&
13790             N1.getOperand(0) == N0.getOperand(0)) {
13791           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13792                                        DAG.getConstantFP(2.0, DL, VT));
13793           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
13794         }
13795       }
13796 
13797       if (N0.getOpcode() == ISD::FADD) {
13798         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13799         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
13800         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
13801             (N0.getOperand(0) == N1)) {
13802           return DAG.getNode(ISD::FMUL, DL, VT, N1,
13803                              DAG.getConstantFP(3.0, DL, VT));
13804         }
13805       }
13806 
13807       if (N1.getOpcode() == ISD::FADD) {
13808         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13809         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
13810         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
13811             N1.getOperand(0) == N0) {
13812           return DAG.getNode(ISD::FMUL, DL, VT, N0,
13813                              DAG.getConstantFP(3.0, DL, VT));
13814         }
13815       }
13816 
13817       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
13818       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
13819           N0.getOperand(0) == N0.getOperand(1) &&
13820           N1.getOperand(0) == N1.getOperand(1) &&
13821           N0.getOperand(0) == N1.getOperand(0)) {
13822         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
13823                            DAG.getConstantFP(4.0, DL, VT));
13824       }
13825     }
13826   } // enable-unsafe-fp-math
13827 
13828   // FADD -> FMA combines:
13829   if (SDValue Fused = visitFADDForFMACombine(N)) {
13830     AddToWorklist(Fused.getNode());
13831     return Fused;
13832   }
13833   return SDValue();
13834 }
13835 
13836 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
13837   SDValue Chain = N->getOperand(0);
13838   SDValue N0 = N->getOperand(1);
13839   SDValue N1 = N->getOperand(2);
13840   EVT VT = N->getValueType(0);
13841   EVT ChainVT = N->getValueType(1);
13842   SDLoc DL(N);
13843   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13844 
13845   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
13846   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13847     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13848             N1, DAG, LegalOperations, ForCodeSize)) {
13849       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13850                          {Chain, N0, NegN1});
13851     }
13852 
13853   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
13854   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13855     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13856             N0, DAG, LegalOperations, ForCodeSize)) {
13857       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13858                          {Chain, N1, NegN0});
13859     }
13860   return SDValue();
13861 }
13862 
13863 SDValue DAGCombiner::visitFSUB(SDNode *N) {
13864   SDValue N0 = N->getOperand(0);
13865   SDValue N1 = N->getOperand(1);
13866   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13867   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13868   EVT VT = N->getValueType(0);
13869   SDLoc DL(N);
13870   const TargetOptions &Options = DAG.getTarget().Options;
13871   const SDNodeFlags Flags = N->getFlags();
13872   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13873 
13874   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13875     return R;
13876 
13877   // fold vector ops
13878   if (VT.isVector())
13879     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13880       return FoldedVOp;
13881 
13882   // fold (fsub c1, c2) -> c1-c2
13883   if (N0CFP && N1CFP)
13884     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13885 
13886   if (SDValue NewSel = foldBinOpIntoSelect(N))
13887     return NewSel;
13888 
13889   // (fsub A, 0) -> A
13890   if (N1CFP && N1CFP->isZero()) {
13891     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
13892         Flags.hasNoSignedZeros()) {
13893       return N0;
13894     }
13895   }
13896 
13897   if (N0 == N1) {
13898     // (fsub x, x) -> 0.0
13899     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
13900       return DAG.getConstantFP(0.0f, DL, VT);
13901   }
13902 
13903   // (fsub -0.0, N1) -> -N1
13904   if (N0CFP && N0CFP->isZero()) {
13905     if (N0CFP->isNegative() ||
13906         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13907       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
13908       // flushed to zero, unless all users treat denorms as zero (DAZ).
13909       // FIXME: This transform will change the sign of a NaN and the behavior
13910       // of a signaling NaN. It is only valid when a NoNaN flag is present.
13911       DenormalMode DenormMode = DAG.getDenormalMode(VT);
13912       if (DenormMode == DenormalMode::getIEEE()) {
13913         if (SDValue NegN1 =
13914                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13915           return NegN1;
13916         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13917           return DAG.getNode(ISD::FNEG, DL, VT, N1);
13918       }
13919     }
13920   }
13921 
13922   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13923        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13924       N1.getOpcode() == ISD::FADD) {
13925     // X - (X + Y) -> -Y
13926     if (N0 == N1->getOperand(0))
13927       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13928     // X - (Y + X) -> -Y
13929     if (N0 == N1->getOperand(1))
13930       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13931   }
13932 
13933   // fold (fsub A, (fneg B)) -> (fadd A, B)
13934   if (SDValue NegN1 =
13935           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13936     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13937 
13938   // FSUB -> FMA combines:
13939   if (SDValue Fused = visitFSUBForFMACombine(N)) {
13940     AddToWorklist(Fused.getNode());
13941     return Fused;
13942   }
13943 
13944   return SDValue();
13945 }
13946 
13947 SDValue DAGCombiner::visitFMUL(SDNode *N) {
13948   SDValue N0 = N->getOperand(0);
13949   SDValue N1 = N->getOperand(1);
13950   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13951   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13952   EVT VT = N->getValueType(0);
13953   SDLoc DL(N);
13954   const TargetOptions &Options = DAG.getTarget().Options;
13955   const SDNodeFlags Flags = N->getFlags();
13956   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13957 
13958   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13959     return R;
13960 
13961   // fold vector ops
13962   if (VT.isVector()) {
13963     // This just handles C1 * C2 for vectors. Other vector folds are below.
13964     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13965       return FoldedVOp;
13966   }
13967 
13968   // fold (fmul c1, c2) -> c1*c2
13969   if (N0CFP && N1CFP)
13970     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13971 
13972   // canonicalize constant to RHS
13973   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13974      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
13975     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13976 
13977   if (SDValue NewSel = foldBinOpIntoSelect(N))
13978     return NewSel;
13979 
13980   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
13981     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13982     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13983         N0.getOpcode() == ISD::FMUL) {
13984       SDValue N00 = N0.getOperand(0);
13985       SDValue N01 = N0.getOperand(1);
13986       // Avoid an infinite loop by making sure that N00 is not a constant
13987       // (the inner multiply has not been constant folded yet).
13988       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
13989           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
13990         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13991         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13992       }
13993     }
13994 
13995     // Match a special-case: we convert X * 2.0 into fadd.
13996     // fmul (fadd X, X), C -> fmul X, 2.0 * C
13997     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13998         N0.getOperand(0) == N0.getOperand(1)) {
13999       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
14000       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14001       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14002     }
14003   }
14004 
14005   // fold (fmul X, 2.0) -> (fadd X, X)
14006   if (N1CFP && N1CFP->isExactlyValue(+2.0))
14007     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14008 
14009   // fold (fmul X, -1.0) -> (fsub -0.0, X)
14010   if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
14011     if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
14012       return DAG.getNode(ISD::FSUB, DL, VT,
14013                          DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
14014     }
14015   }
14016 
14017   // -N0 * -N1 --> N0 * N1
14018   TargetLowering::NegatibleCost CostN0 =
14019       TargetLowering::NegatibleCost::Expensive;
14020   TargetLowering::NegatibleCost CostN1 =
14021       TargetLowering::NegatibleCost::Expensive;
14022   SDValue NegN0 =
14023       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14024   SDValue NegN1 =
14025       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14026   if (NegN0 && NegN1 &&
14027       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14028        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14029     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14030 
14031   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14032   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14033   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14034       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14035       TLI.isOperationLegal(ISD::FABS, VT)) {
14036     SDValue Select = N0, X = N1;
14037     if (Select.getOpcode() != ISD::SELECT)
14038       std::swap(Select, X);
14039 
14040     SDValue Cond = Select.getOperand(0);
14041     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14042     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14043 
14044     if (TrueOpnd && FalseOpnd &&
14045         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14046         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14047         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14048       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14049       switch (CC) {
14050       default: break;
14051       case ISD::SETOLT:
14052       case ISD::SETULT:
14053       case ISD::SETOLE:
14054       case ISD::SETULE:
14055       case ISD::SETLT:
14056       case ISD::SETLE:
14057         std::swap(TrueOpnd, FalseOpnd);
14058         LLVM_FALLTHROUGH;
14059       case ISD::SETOGT:
14060       case ISD::SETUGT:
14061       case ISD::SETOGE:
14062       case ISD::SETUGE:
14063       case ISD::SETGT:
14064       case ISD::SETGE:
14065         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14066             TLI.isOperationLegal(ISD::FNEG, VT))
14067           return DAG.getNode(ISD::FNEG, DL, VT,
14068                    DAG.getNode(ISD::FABS, DL, VT, X));
14069         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14070           return DAG.getNode(ISD::FABS, DL, VT, X);
14071 
14072         break;
14073       }
14074     }
14075   }
14076 
14077   // FMUL -> FMA combines:
14078   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14079     AddToWorklist(Fused.getNode());
14080     return Fused;
14081   }
14082 
14083   return SDValue();
14084 }
14085 
14086 SDValue DAGCombiner::visitFMA(SDNode *N) {
14087   SDValue N0 = N->getOperand(0);
14088   SDValue N1 = N->getOperand(1);
14089   SDValue N2 = N->getOperand(2);
14090   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14091   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14092   EVT VT = N->getValueType(0);
14093   SDLoc DL(N);
14094   const TargetOptions &Options = DAG.getTarget().Options;
14095   // FMA nodes have flags that propagate to the created nodes.
14096   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14097 
14098   bool UnsafeFPMath =
14099       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14100 
14101   // Constant fold FMA.
14102   if (isa<ConstantFPSDNode>(N0) &&
14103       isa<ConstantFPSDNode>(N1) &&
14104       isa<ConstantFPSDNode>(N2)) {
14105     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14106   }
14107 
14108   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14109   TargetLowering::NegatibleCost CostN0 =
14110       TargetLowering::NegatibleCost::Expensive;
14111   TargetLowering::NegatibleCost CostN1 =
14112       TargetLowering::NegatibleCost::Expensive;
14113   SDValue NegN0 =
14114       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14115   SDValue NegN1 =
14116       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14117   if (NegN0 && NegN1 &&
14118       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14119        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14120     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14121 
14122   if (UnsafeFPMath) {
14123     if (N0CFP && N0CFP->isZero())
14124       return N2;
14125     if (N1CFP && N1CFP->isZero())
14126       return N2;
14127   }
14128 
14129   if (N0CFP && N0CFP->isExactlyValue(1.0))
14130     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14131   if (N1CFP && N1CFP->isExactlyValue(1.0))
14132     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14133 
14134   // Canonicalize (fma c, x, y) -> (fma x, c, y)
14135   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14136      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14137     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14138 
14139   if (UnsafeFPMath) {
14140     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14141     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14142         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14143         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14144       return DAG.getNode(ISD::FMUL, DL, VT, N0,
14145                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14146     }
14147 
14148     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14149     if (N0.getOpcode() == ISD::FMUL &&
14150         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14151         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14152       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14153                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14154                          N2);
14155     }
14156   }
14157 
14158   // (fma x, -1, y) -> (fadd (fneg x), y)
14159   if (N1CFP) {
14160     if (N1CFP->isExactlyValue(1.0))
14161       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14162 
14163     if (N1CFP->isExactlyValue(-1.0) &&
14164         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14165       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14166       AddToWorklist(RHSNeg.getNode());
14167       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14168     }
14169 
14170     // fma (fneg x), K, y -> fma x -K, y
14171     if (N0.getOpcode() == ISD::FNEG &&
14172         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14173          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14174                                               ForCodeSize)))) {
14175       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14176                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14177     }
14178   }
14179 
14180   if (UnsafeFPMath) {
14181     // (fma x, c, x) -> (fmul x, (c+1))
14182     if (N1CFP && N0 == N2) {
14183       return DAG.getNode(
14184           ISD::FMUL, DL, VT, N0,
14185           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14186     }
14187 
14188     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14189     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14190       return DAG.getNode(
14191           ISD::FMUL, DL, VT, N0,
14192           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14193     }
14194   }
14195 
14196   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14197   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14198   if (!TLI.isFNegFree(VT))
14199     if (SDValue Neg = TLI.getCheaperNegatedExpression(
14200             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14201       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14202   return SDValue();
14203 }
14204 
14205 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14206 // reciprocal.
14207 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14208 // Notice that this is not always beneficial. One reason is different targets
14209 // may have different costs for FDIV and FMUL, so sometimes the cost of two
14210 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14211 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14212 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14213   // TODO: Limit this transform based on optsize/minsize - it always creates at
14214   //       least 1 extra instruction. But the perf win may be substantial enough
14215   //       that only minsize should restrict this.
14216   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14217   const SDNodeFlags Flags = N->getFlags();
14218   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14219     return SDValue();
14220 
14221   // Skip if current node is a reciprocal/fneg-reciprocal.
14222   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14223   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14224   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14225     return SDValue();
14226 
14227   // Exit early if the target does not want this transform or if there can't
14228   // possibly be enough uses of the divisor to make the transform worthwhile.
14229   unsigned MinUses = TLI.combineRepeatedFPDivisors();
14230 
14231   // For splat vectors, scale the number of uses by the splat factor. If we can
14232   // convert the division into a scalar op, that will likely be much faster.
14233   unsigned NumElts = 1;
14234   EVT VT = N->getValueType(0);
14235   if (VT.isVector() && DAG.isSplatValue(N1))
14236     NumElts = VT.getVectorNumElements();
14237 
14238   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14239     return SDValue();
14240 
14241   // Find all FDIV users of the same divisor.
14242   // Use a set because duplicates may be present in the user list.
14243   SetVector<SDNode *> Users;
14244   for (auto *U : N1->uses()) {
14245     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14246       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14247       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14248           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14249           U->getFlags().hasAllowReassociation() &&
14250           U->getFlags().hasNoSignedZeros())
14251         continue;
14252 
14253       // This division is eligible for optimization only if global unsafe math
14254       // is enabled or if this division allows reciprocal formation.
14255       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14256         Users.insert(U);
14257     }
14258   }
14259 
14260   // Now that we have the actual number of divisor uses, make sure it meets
14261   // the minimum threshold specified by the target.
14262   if ((Users.size() * NumElts) < MinUses)
14263     return SDValue();
14264 
14265   SDLoc DL(N);
14266   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14267   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14268 
14269   // Dividend / Divisor -> Dividend * Reciprocal
14270   for (auto *U : Users) {
14271     SDValue Dividend = U->getOperand(0);
14272     if (Dividend != FPOne) {
14273       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14274                                     Reciprocal, Flags);
14275       CombineTo(U, NewNode);
14276     } else if (U != Reciprocal.getNode()) {
14277       // In the absence of fast-math-flags, this user node is always the
14278       // same node as Reciprocal, but with FMF they may be different nodes.
14279       CombineTo(U, Reciprocal);
14280     }
14281   }
14282   return SDValue(N, 0);  // N was replaced.
14283 }
14284 
14285 SDValue DAGCombiner::visitFDIV(SDNode *N) {
14286   SDValue N0 = N->getOperand(0);
14287   SDValue N1 = N->getOperand(1);
14288   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14289   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14290   EVT VT = N->getValueType(0);
14291   SDLoc DL(N);
14292   const TargetOptions &Options = DAG.getTarget().Options;
14293   SDNodeFlags Flags = N->getFlags();
14294   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14295 
14296   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14297     return R;
14298 
14299   // fold vector ops
14300   if (VT.isVector())
14301     if (SDValue FoldedVOp = SimplifyVBinOp(N))
14302       return FoldedVOp;
14303 
14304   // fold (fdiv c1, c2) -> c1/c2
14305   if (N0CFP && N1CFP)
14306     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
14307 
14308   if (SDValue NewSel = foldBinOpIntoSelect(N))
14309     return NewSel;
14310 
14311   if (SDValue V = combineRepeatedFPDivisors(N))
14312     return V;
14313 
14314   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14315     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14316     if (N1CFP) {
14317       // Compute the reciprocal 1.0 / c2.
14318       const APFloat &N1APF = N1CFP->getValueAPF();
14319       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14320       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14321       // Only do the transform if the reciprocal is a legal fp immediate that
14322       // isn't too nasty (eg NaN, denormal, ...).
14323       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14324           (!LegalOperations ||
14325            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14326            // backend)... we should handle this gracefully after Legalize.
14327            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14328            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14329            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14330         return DAG.getNode(ISD::FMUL, DL, VT, N0,
14331                            DAG.getConstantFP(Recip, DL, VT));
14332     }
14333 
14334     // If this FDIV is part of a reciprocal square root, it may be folded
14335     // into a target-specific square root estimate instruction.
14336     if (N1.getOpcode() == ISD::FSQRT) {
14337       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14338         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14339     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14340                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14341       if (SDValue RV =
14342               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14343         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14344         AddToWorklist(RV.getNode());
14345         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14346       }
14347     } else if (N1.getOpcode() == ISD::FP_ROUND &&
14348                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14349       if (SDValue RV =
14350               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14351         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14352         AddToWorklist(RV.getNode());
14353         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14354       }
14355     } else if (N1.getOpcode() == ISD::FMUL) {
14356       // Look through an FMUL. Even though this won't remove the FDIV directly,
14357       // it's still worthwhile to get rid of the FSQRT if possible.
14358       SDValue Sqrt, Y;
14359       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14360         Sqrt = N1.getOperand(0);
14361         Y = N1.getOperand(1);
14362       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14363         Sqrt = N1.getOperand(1);
14364         Y = N1.getOperand(0);
14365       }
14366       if (Sqrt.getNode()) {
14367         // If the other multiply operand is known positive, pull it into the
14368         // sqrt. That will eliminate the division if we convert to an estimate.
14369         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14370             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14371           SDValue A;
14372           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14373             A = Y.getOperand(0);
14374           else if (Y == Sqrt.getOperand(0))
14375             A = Y;
14376           if (A) {
14377             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14378             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14379             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14380             SDValue AAZ =
14381                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14382             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14383               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14384 
14385             // Estimate creation failed. Clean up speculatively created nodes.
14386             recursivelyDeleteUnusedNodes(AAZ.getNode());
14387           }
14388         }
14389 
14390         // We found a FSQRT, so try to make this fold:
14391         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14392         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14393           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14394           AddToWorklist(Div.getNode());
14395           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14396         }
14397       }
14398     }
14399 
14400     // Fold into a reciprocal estimate and multiply instead of a real divide.
14401     if (Options.NoInfsFPMath || Flags.hasNoInfs())
14402       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14403         return RV;
14404   }
14405 
14406   // Fold X/Sqrt(X) -> Sqrt(X)
14407   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14408       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14409     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14410       return N1;
14411 
14412   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14413   TargetLowering::NegatibleCost CostN0 =
14414       TargetLowering::NegatibleCost::Expensive;
14415   TargetLowering::NegatibleCost CostN1 =
14416       TargetLowering::NegatibleCost::Expensive;
14417   SDValue NegN0 =
14418       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14419   SDValue NegN1 =
14420       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14421   if (NegN0 && NegN1 &&
14422       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14423        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14424     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14425 
14426   return SDValue();
14427 }
14428 
14429 SDValue DAGCombiner::visitFREM(SDNode *N) {
14430   SDValue N0 = N->getOperand(0);
14431   SDValue N1 = N->getOperand(1);
14432   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14433   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14434   EVT VT = N->getValueType(0);
14435   SDNodeFlags Flags = N->getFlags();
14436   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14437 
14438   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14439     return R;
14440 
14441   // fold (frem c1, c2) -> fmod(c1,c2)
14442   if (N0CFP && N1CFP)
14443     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
14444 
14445   if (SDValue NewSel = foldBinOpIntoSelect(N))
14446     return NewSel;
14447 
14448   return SDValue();
14449 }
14450 
14451 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14452   SDNodeFlags Flags = N->getFlags();
14453   const TargetOptions &Options = DAG.getTarget().Options;
14454 
14455   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14456   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14457   if (!Flags.hasApproximateFuncs() ||
14458       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14459     return SDValue();
14460 
14461   SDValue N0 = N->getOperand(0);
14462   if (TLI.isFsqrtCheap(N0, DAG))
14463     return SDValue();
14464 
14465   // FSQRT nodes have flags that propagate to the created nodes.
14466   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14467   //       transform the fdiv, we may produce a sub-optimal estimate sequence
14468   //       because the reciprocal calculation may not have to filter out a
14469   //       0.0 input.
14470   return buildSqrtEstimate(N0, Flags);
14471 }
14472 
14473 /// copysign(x, fp_extend(y)) -> copysign(x, y)
14474 /// copysign(x, fp_round(y)) -> copysign(x, y)
14475 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14476   SDValue N1 = N->getOperand(1);
14477   if ((N1.getOpcode() == ISD::FP_EXTEND ||
14478        N1.getOpcode() == ISD::FP_ROUND)) {
14479     EVT N1VT = N1->getValueType(0);
14480     EVT N1Op0VT = N1->getOperand(0).getValueType();
14481 
14482     // Always fold no-op FP casts.
14483     if (N1VT == N1Op0VT)
14484       return true;
14485 
14486     // Do not optimize out type conversion of f128 type yet.
14487     // For some targets like x86_64, configuration is changed to keep one f128
14488     // value in one SSE register, but instruction selection cannot handle
14489     // FCOPYSIGN on SSE registers yet.
14490     if (N1Op0VT == MVT::f128)
14491       return false;
14492 
14493     // Avoid mismatched vector operand types, for better instruction selection.
14494     if (N1Op0VT.isVector())
14495       return false;
14496 
14497     return true;
14498   }
14499   return false;
14500 }
14501 
14502 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14503   SDValue N0 = N->getOperand(0);
14504   SDValue N1 = N->getOperand(1);
14505   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14506   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14507   EVT VT = N->getValueType(0);
14508 
14509   if (N0CFP && N1CFP) // Constant fold
14510     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
14511 
14512   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14513     const APFloat &V = N1C->getValueAPF();
14514     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
14515     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14516     if (!V.isNegative()) {
14517       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14518         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14519     } else {
14520       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14521         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14522                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14523     }
14524   }
14525 
14526   // copysign(fabs(x), y) -> copysign(x, y)
14527   // copysign(fneg(x), y) -> copysign(x, y)
14528   // copysign(copysign(x,z), y) -> copysign(x, y)
14529   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14530       N0.getOpcode() == ISD::FCOPYSIGN)
14531     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14532 
14533   // copysign(x, abs(y)) -> abs(x)
14534   if (N1.getOpcode() == ISD::FABS)
14535     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14536 
14537   // copysign(x, copysign(y,z)) -> copysign(x, z)
14538   if (N1.getOpcode() == ISD::FCOPYSIGN)
14539     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14540 
14541   // copysign(x, fp_extend(y)) -> copysign(x, y)
14542   // copysign(x, fp_round(y)) -> copysign(x, y)
14543   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14544     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14545 
14546   return SDValue();
14547 }
14548 
14549 SDValue DAGCombiner::visitFPOW(SDNode *N) {
14550   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14551   if (!ExponentC)
14552     return SDValue();
14553   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14554 
14555   // Try to convert x ** (1/3) into cube root.
14556   // TODO: Handle the various flavors of long double.
14557   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14558   //       Some range near 1/3 should be fine.
14559   EVT VT = N->getValueType(0);
14560   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14561       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14562     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14563     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14564     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
14565     // For regular numbers, rounding may cause the results to differ.
14566     // Therefore, we require { nsz ninf nnan afn } for this transform.
14567     // TODO: We could select out the special cases if we don't have nsz/ninf.
14568     SDNodeFlags Flags = N->getFlags();
14569     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14570         !Flags.hasApproximateFuncs())
14571       return SDValue();
14572 
14573     // Do not create a cbrt() libcall if the target does not have it, and do not
14574     // turn a pow that has lowering support into a cbrt() libcall.
14575     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14576         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14577          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14578       return SDValue();
14579 
14580     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14581   }
14582 
14583   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14584   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14585   // TODO: This could be extended (using a target hook) to handle smaller
14586   // power-of-2 fractional exponents.
14587   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14588   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14589   if (ExponentIs025 || ExponentIs075) {
14590     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14591     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
14592     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14593     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
14594     // For regular numbers, rounding may cause the results to differ.
14595     // Therefore, we require { nsz ninf afn } for this transform.
14596     // TODO: We could select out the special cases if we don't have nsz/ninf.
14597     SDNodeFlags Flags = N->getFlags();
14598 
14599     // We only need no signed zeros for the 0.25 case.
14600     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14601         !Flags.hasApproximateFuncs())
14602       return SDValue();
14603 
14604     // Don't double the number of libcalls. We are trying to inline fast code.
14605     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14606       return SDValue();
14607 
14608     // Assume that libcalls are the smallest code.
14609     // TODO: This restriction should probably be lifted for vectors.
14610     if (ForCodeSize)
14611       return SDValue();
14612 
14613     // pow(X, 0.25) --> sqrt(sqrt(X))
14614     SDLoc DL(N);
14615     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14616     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14617     if (ExponentIs025)
14618       return SqrtSqrt;
14619     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14620     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14621   }
14622 
14623   return SDValue();
14624 }
14625 
14626 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14627                                const TargetLowering &TLI) {
14628   // This optimization is guarded by a function attribute because it may produce
14629   // unexpected results. Ie, programs may be relying on the platform-specific
14630   // undefined behavior when the float-to-int conversion overflows.
14631   const Function &F = DAG.getMachineFunction().getFunction();
14632   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
14633   if (StrictOverflow.getValueAsString().equals("false"))
14634     return SDValue();
14635 
14636   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14637   // replacing casts with a libcall. We also must be allowed to ignore -0.0
14638   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14639   // conversions would return +0.0.
14640   // FIXME: We should be able to use node-level FMF here.
14641   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14642   EVT VT = N->getValueType(0);
14643   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14644       !DAG.getTarget().Options.NoSignedZerosFPMath)
14645     return SDValue();
14646 
14647   // fptosi/fptoui round towards zero, so converting from FP to integer and
14648   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14649   SDValue N0 = N->getOperand(0);
14650   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14651       N0.getOperand(0).getValueType() == VT)
14652     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14653 
14654   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14655       N0.getOperand(0).getValueType() == VT)
14656     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14657 
14658   return SDValue();
14659 }
14660 
14661 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14662   SDValue N0 = N->getOperand(0);
14663   EVT VT = N->getValueType(0);
14664   EVT OpVT = N0.getValueType();
14665 
14666   // [us]itofp(undef) = 0, because the result value is bounded.
14667   if (N0.isUndef())
14668     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14669 
14670   // fold (sint_to_fp c1) -> c1fp
14671   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14672       // ...but only if the target supports immediate floating-point values
14673       (!LegalOperations ||
14674        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14675     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14676 
14677   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14678   // but UINT_TO_FP is legal on this target, try to convert.
14679   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14680       hasOperation(ISD::UINT_TO_FP, OpVT)) {
14681     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14682     if (DAG.SignBitIsZero(N0))
14683       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14684   }
14685 
14686   // The next optimizations are desirable only if SELECT_CC can be lowered.
14687   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14688   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14689       !VT.isVector() &&
14690       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14691     SDLoc DL(N);
14692     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14693                          DAG.getConstantFP(0.0, DL, VT));
14694   }
14695 
14696   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14697   //      (select (setcc x, y, cc), 1.0, 0.0)
14698   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14699       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14700       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14701     SDLoc DL(N);
14702     return DAG.getSelect(DL, VT, N0.getOperand(0),
14703                          DAG.getConstantFP(1.0, DL, VT),
14704                          DAG.getConstantFP(0.0, DL, VT));
14705   }
14706 
14707   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14708     return FTrunc;
14709 
14710   return SDValue();
14711 }
14712 
14713 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14714   SDValue N0 = N->getOperand(0);
14715   EVT VT = N->getValueType(0);
14716   EVT OpVT = N0.getValueType();
14717 
14718   // [us]itofp(undef) = 0, because the result value is bounded.
14719   if (N0.isUndef())
14720     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14721 
14722   // fold (uint_to_fp c1) -> c1fp
14723   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14724       // ...but only if the target supports immediate floating-point values
14725       (!LegalOperations ||
14726        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14727     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14728 
14729   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
14730   // but SINT_TO_FP is legal on this target, try to convert.
14731   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
14732       hasOperation(ISD::SINT_TO_FP, OpVT)) {
14733     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
14734     if (DAG.SignBitIsZero(N0))
14735       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14736   }
14737 
14738   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
14739   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
14740       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14741     SDLoc DL(N);
14742     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
14743                          DAG.getConstantFP(0.0, DL, VT));
14744   }
14745 
14746   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14747     return FTrunc;
14748 
14749   return SDValue();
14750 }
14751 
14752 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
14753 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
14754   SDValue N0 = N->getOperand(0);
14755   EVT VT = N->getValueType(0);
14756 
14757   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
14758     return SDValue();
14759 
14760   SDValue Src = N0.getOperand(0);
14761   EVT SrcVT = Src.getValueType();
14762   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
14763   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
14764 
14765   // We can safely assume the conversion won't overflow the output range,
14766   // because (for example) (uint8_t)18293.f is undefined behavior.
14767 
14768   // Since we can assume the conversion won't overflow, our decision as to
14769   // whether the input will fit in the float should depend on the minimum
14770   // of the input range and output range.
14771 
14772   // This means this is also safe for a signed input and unsigned output, since
14773   // a negative input would lead to undefined behavior.
14774   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
14775   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
14776   unsigned ActualSize = std::min(InputSize, OutputSize);
14777   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
14778 
14779   // We can only fold away the float conversion if the input range can be
14780   // represented exactly in the float range.
14781   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
14782     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
14783       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
14784                                                        : ISD::ZERO_EXTEND;
14785       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
14786     }
14787     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
14788       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
14789     return DAG.getBitcast(VT, Src);
14790   }
14791   return SDValue();
14792 }
14793 
14794 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
14795   SDValue N0 = N->getOperand(0);
14796   EVT VT = N->getValueType(0);
14797 
14798   // fold (fp_to_sint undef) -> undef
14799   if (N0.isUndef())
14800     return DAG.getUNDEF(VT);
14801 
14802   // fold (fp_to_sint c1fp) -> c1
14803   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14804     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
14805 
14806   return FoldIntToFPToInt(N, DAG);
14807 }
14808 
14809 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
14810   SDValue N0 = N->getOperand(0);
14811   EVT VT = N->getValueType(0);
14812 
14813   // fold (fp_to_uint undef) -> undef
14814   if (N0.isUndef())
14815     return DAG.getUNDEF(VT);
14816 
14817   // fold (fp_to_uint c1fp) -> c1
14818   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14819     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
14820 
14821   return FoldIntToFPToInt(N, DAG);
14822 }
14823 
14824 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
14825   SDValue N0 = N->getOperand(0);
14826   SDValue N1 = N->getOperand(1);
14827   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14828   EVT VT = N->getValueType(0);
14829 
14830   // fold (fp_round c1fp) -> c1fp
14831   if (N0CFP)
14832     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
14833 
14834   // fold (fp_round (fp_extend x)) -> x
14835   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
14836     return N0.getOperand(0);
14837 
14838   // fold (fp_round (fp_round x)) -> (fp_round x)
14839   if (N0.getOpcode() == ISD::FP_ROUND) {
14840     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
14841     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
14842 
14843     // Skip this folding if it results in an fp_round from f80 to f16.
14844     //
14845     // f80 to f16 always generates an expensive (and as yet, unimplemented)
14846     // libcall to __truncxfhf2 instead of selecting native f16 conversion
14847     // instructions from f32 or f64.  Moreover, the first (value-preserving)
14848     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
14849     // x86.
14850     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
14851       return SDValue();
14852 
14853     // If the first fp_round isn't a value preserving truncation, it might
14854     // introduce a tie in the second fp_round, that wouldn't occur in the
14855     // single-step fp_round we want to fold to.
14856     // In other words, double rounding isn't the same as rounding.
14857     // Also, this is a value preserving truncation iff both fp_round's are.
14858     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
14859       SDLoc DL(N);
14860       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
14861                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
14862     }
14863   }
14864 
14865   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
14866   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
14867     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
14868                               N0.getOperand(0), N1);
14869     AddToWorklist(Tmp.getNode());
14870     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
14871                        Tmp, N0.getOperand(1));
14872   }
14873 
14874   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14875     return NewVSel;
14876 
14877   return SDValue();
14878 }
14879 
14880 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
14881   SDValue N0 = N->getOperand(0);
14882   EVT VT = N->getValueType(0);
14883 
14884   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
14885   if (N->hasOneUse() &&
14886       N->use_begin()->getOpcode() == ISD::FP_ROUND)
14887     return SDValue();
14888 
14889   // fold (fp_extend c1fp) -> c1fp
14890   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14891     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
14892 
14893   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
14894   if (N0.getOpcode() == ISD::FP16_TO_FP &&
14895       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14896     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14897 
14898   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14899   // value of X.
14900   if (N0.getOpcode() == ISD::FP_ROUND
14901       && N0.getConstantOperandVal(1) == 1) {
14902     SDValue In = N0.getOperand(0);
14903     if (In.getValueType() == VT) return In;
14904     if (VT.bitsLT(In.getValueType()))
14905       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14906                          In, N0.getOperand(1));
14907     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14908   }
14909 
14910   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14911   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14912        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14913     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14914     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14915                                      LN0->getChain(),
14916                                      LN0->getBasePtr(), N0.getValueType(),
14917                                      LN0->getMemOperand());
14918     CombineTo(N, ExtLoad);
14919     CombineTo(N0.getNode(),
14920               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14921                           N0.getValueType(), ExtLoad,
14922                           DAG.getIntPtrConstant(1, SDLoc(N0))),
14923               ExtLoad.getValue(1));
14924     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14925   }
14926 
14927   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14928     return NewVSel;
14929 
14930   return SDValue();
14931 }
14932 
14933 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14934   SDValue N0 = N->getOperand(0);
14935   EVT VT = N->getValueType(0);
14936 
14937   // fold (fceil c1) -> fceil(c1)
14938   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14939     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14940 
14941   return SDValue();
14942 }
14943 
14944 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14945   SDValue N0 = N->getOperand(0);
14946   EVT VT = N->getValueType(0);
14947 
14948   // fold (ftrunc c1) -> ftrunc(c1)
14949   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14950     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14951 
14952   // fold ftrunc (known rounded int x) -> x
14953   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14954   // likely to be generated to extract integer from a rounded floating value.
14955   switch (N0.getOpcode()) {
14956   default: break;
14957   case ISD::FRINT:
14958   case ISD::FTRUNC:
14959   case ISD::FNEARBYINT:
14960   case ISD::FFLOOR:
14961   case ISD::FCEIL:
14962     return N0;
14963   }
14964 
14965   return SDValue();
14966 }
14967 
14968 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14969   SDValue N0 = N->getOperand(0);
14970   EVT VT = N->getValueType(0);
14971 
14972   // fold (ffloor c1) -> ffloor(c1)
14973   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14974     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14975 
14976   return SDValue();
14977 }
14978 
14979 SDValue DAGCombiner::visitFNEG(SDNode *N) {
14980   SDValue N0 = N->getOperand(0);
14981   EVT VT = N->getValueType(0);
14982   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14983 
14984   // Constant fold FNEG.
14985   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14986     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14987 
14988   if (SDValue NegN0 =
14989           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14990     return NegN0;
14991 
14992   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14993   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14994   // know it was called from a context with a nsz flag if the input fsub does
14995   // not.
14996   if (N0.getOpcode() == ISD::FSUB &&
14997       (DAG.getTarget().Options.NoSignedZerosFPMath ||
14998        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14999     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
15000                        N0.getOperand(0));
15001   }
15002 
15003   if (SDValue Cast = foldSignChangeInBitcast(N))
15004     return Cast;
15005 
15006   return SDValue();
15007 }
15008 
15009 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
15010                             APFloat (*Op)(const APFloat &, const APFloat &)) {
15011   SDValue N0 = N->getOperand(0);
15012   SDValue N1 = N->getOperand(1);
15013   EVT VT = N->getValueType(0);
15014   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
15015   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
15016   const SDNodeFlags Flags = N->getFlags();
15017   unsigned Opc = N->getOpcode();
15018   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15019   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15020   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15021 
15022   if (N0CFP && N1CFP) {
15023     const APFloat &C0 = N0CFP->getValueAPF();
15024     const APFloat &C1 = N1CFP->getValueAPF();
15025     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
15026   }
15027 
15028   // Canonicalize to constant on RHS.
15029   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15030       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15031     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15032 
15033   if (N1CFP) {
15034     const APFloat &AF = N1CFP->getValueAPF();
15035 
15036     // minnum(X, nan) -> X
15037     // maxnum(X, nan) -> X
15038     // minimum(X, nan) -> nan
15039     // maximum(X, nan) -> nan
15040     if (AF.isNaN())
15041       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15042 
15043     // In the following folds, inf can be replaced with the largest finite
15044     // float, if the ninf flag is set.
15045     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15046       // minnum(X, -inf) -> -inf
15047       // maxnum(X, +inf) -> +inf
15048       // minimum(X, -inf) -> -inf if nnan
15049       // maximum(X, +inf) -> +inf if nnan
15050       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15051         return N->getOperand(1);
15052 
15053       // minnum(X, +inf) -> X if nnan
15054       // maxnum(X, -inf) -> X if nnan
15055       // minimum(X, +inf) -> X
15056       // maximum(X, -inf) -> X
15057       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15058         return N->getOperand(0);
15059     }
15060   }
15061 
15062   return SDValue();
15063 }
15064 
15065 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
15066   return visitFMinMax(DAG, N, minnum);
15067 }
15068 
15069 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
15070   return visitFMinMax(DAG, N, maxnum);
15071 }
15072 
15073 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
15074   return visitFMinMax(DAG, N, minimum);
15075 }
15076 
15077 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
15078   return visitFMinMax(DAG, N, maximum);
15079 }
15080 
15081 SDValue DAGCombiner::visitFABS(SDNode *N) {
15082   SDValue N0 = N->getOperand(0);
15083   EVT VT = N->getValueType(0);
15084 
15085   // fold (fabs c1) -> fabs(c1)
15086   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15087     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15088 
15089   // fold (fabs (fabs x)) -> (fabs x)
15090   if (N0.getOpcode() == ISD::FABS)
15091     return N->getOperand(0);
15092 
15093   // fold (fabs (fneg x)) -> (fabs x)
15094   // fold (fabs (fcopysign x, y)) -> (fabs x)
15095   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15096     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15097 
15098   if (SDValue Cast = foldSignChangeInBitcast(N))
15099     return Cast;
15100 
15101   return SDValue();
15102 }
15103 
15104 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15105   SDValue Chain = N->getOperand(0);
15106   SDValue N1 = N->getOperand(1);
15107   SDValue N2 = N->getOperand(2);
15108 
15109   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15110   // nondeterministic jumps).
15111   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15112     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15113                        N1->getOperand(0), N2);
15114   }
15115 
15116   // If N is a constant we could fold this into a fallthrough or unconditional
15117   // branch. However that doesn't happen very often in normal code, because
15118   // Instcombine/SimplifyCFG should have handled the available opportunities.
15119   // If we did this folding here, it would be necessary to update the
15120   // MachineBasicBlock CFG, which is awkward.
15121 
15122   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15123   // on the target.
15124   if (N1.getOpcode() == ISD::SETCC &&
15125       TLI.isOperationLegalOrCustom(ISD::BR_CC,
15126                                    N1.getOperand(0).getValueType())) {
15127     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15128                        Chain, N1.getOperand(2),
15129                        N1.getOperand(0), N1.getOperand(1), N2);
15130   }
15131 
15132   if (N1.hasOneUse()) {
15133     // rebuildSetCC calls visitXor which may change the Chain when there is a
15134     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15135     HandleSDNode ChainHandle(Chain);
15136     if (SDValue NewN1 = rebuildSetCC(N1))
15137       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15138                          ChainHandle.getValue(), NewN1, N2);
15139   }
15140 
15141   return SDValue();
15142 }
15143 
15144 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15145   if (N.getOpcode() == ISD::SRL ||
15146       (N.getOpcode() == ISD::TRUNCATE &&
15147        (N.getOperand(0).hasOneUse() &&
15148         N.getOperand(0).getOpcode() == ISD::SRL))) {
15149     // Look pass the truncate.
15150     if (N.getOpcode() == ISD::TRUNCATE)
15151       N = N.getOperand(0);
15152 
15153     // Match this pattern so that we can generate simpler code:
15154     //
15155     //   %a = ...
15156     //   %b = and i32 %a, 2
15157     //   %c = srl i32 %b, 1
15158     //   brcond i32 %c ...
15159     //
15160     // into
15161     //
15162     //   %a = ...
15163     //   %b = and i32 %a, 2
15164     //   %c = setcc eq %b, 0
15165     //   brcond %c ...
15166     //
15167     // This applies only when the AND constant value has one bit set and the
15168     // SRL constant is equal to the log2 of the AND constant. The back-end is
15169     // smart enough to convert the result into a TEST/JMP sequence.
15170     SDValue Op0 = N.getOperand(0);
15171     SDValue Op1 = N.getOperand(1);
15172 
15173     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15174       SDValue AndOp1 = Op0.getOperand(1);
15175 
15176       if (AndOp1.getOpcode() == ISD::Constant) {
15177         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15178 
15179         if (AndConst.isPowerOf2() &&
15180             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15181           SDLoc DL(N);
15182           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15183                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15184                               ISD::SETNE);
15185         }
15186       }
15187     }
15188   }
15189 
15190   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15191   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15192   if (N.getOpcode() == ISD::XOR) {
15193     // Because we may call this on a speculatively constructed
15194     // SimplifiedSetCC Node, we need to simplify this node first.
15195     // Ideally this should be folded into SimplifySetCC and not
15196     // here. For now, grab a handle to N so we don't lose it from
15197     // replacements interal to the visit.
15198     HandleSDNode XORHandle(N);
15199     while (N.getOpcode() == ISD::XOR) {
15200       SDValue Tmp = visitXOR(N.getNode());
15201       // No simplification done.
15202       if (!Tmp.getNode())
15203         break;
15204       // Returning N is form in-visit replacement that may invalidated
15205       // N. Grab value from Handle.
15206       if (Tmp.getNode() == N.getNode())
15207         N = XORHandle.getValue();
15208       else // Node simplified. Try simplifying again.
15209         N = Tmp;
15210     }
15211 
15212     if (N.getOpcode() != ISD::XOR)
15213       return N;
15214 
15215     SDValue Op0 = N->getOperand(0);
15216     SDValue Op1 = N->getOperand(1);
15217 
15218     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15219       bool Equal = false;
15220       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15221       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15222           Op0.getValueType() == MVT::i1) {
15223         N = Op0;
15224         Op0 = N->getOperand(0);
15225         Op1 = N->getOperand(1);
15226         Equal = true;
15227       }
15228 
15229       EVT SetCCVT = N.getValueType();
15230       if (LegalTypes)
15231         SetCCVT = getSetCCResultType(SetCCVT);
15232       // Replace the uses of XOR with SETCC
15233       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15234                           Equal ? ISD::SETEQ : ISD::SETNE);
15235     }
15236   }
15237 
15238   return SDValue();
15239 }
15240 
15241 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15242 //
15243 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15244   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15245   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15246 
15247   // If N is a constant we could fold this into a fallthrough or unconditional
15248   // branch. However that doesn't happen very often in normal code, because
15249   // Instcombine/SimplifyCFG should have handled the available opportunities.
15250   // If we did this folding here, it would be necessary to update the
15251   // MachineBasicBlock CFG, which is awkward.
15252 
15253   // Use SimplifySetCC to simplify SETCC's.
15254   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15255                                CondLHS, CondRHS, CC->get(), SDLoc(N),
15256                                false);
15257   if (Simp.getNode()) AddToWorklist(Simp.getNode());
15258 
15259   // fold to a simpler setcc
15260   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15261     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15262                        N->getOperand(0), Simp.getOperand(2),
15263                        Simp.getOperand(0), Simp.getOperand(1),
15264                        N->getOperand(4));
15265 
15266   return SDValue();
15267 }
15268 
15269 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15270                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15271                                      const TargetLowering &TLI) {
15272   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15273     if (LD->isIndexed())
15274       return false;
15275     EVT VT = LD->getMemoryVT();
15276     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15277       return false;
15278     Ptr = LD->getBasePtr();
15279   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15280     if (ST->isIndexed())
15281       return false;
15282     EVT VT = ST->getMemoryVT();
15283     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15284       return false;
15285     Ptr = ST->getBasePtr();
15286     IsLoad = false;
15287   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15288     if (LD->isIndexed())
15289       return false;
15290     EVT VT = LD->getMemoryVT();
15291     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15292         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15293       return false;
15294     Ptr = LD->getBasePtr();
15295     IsMasked = true;
15296   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15297     if (ST->isIndexed())
15298       return false;
15299     EVT VT = ST->getMemoryVT();
15300     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15301         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15302       return false;
15303     Ptr = ST->getBasePtr();
15304     IsLoad = false;
15305     IsMasked = true;
15306   } else {
15307     return false;
15308   }
15309   return true;
15310 }
15311 
15312 /// Try turning a load/store into a pre-indexed load/store when the base
15313 /// pointer is an add or subtract and it has other uses besides the load/store.
15314 /// After the transformation, the new indexed load/store has effectively folded
15315 /// the add/subtract in and all of its other uses are redirected to the
15316 /// new load/store.
15317 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15318   if (Level < AfterLegalizeDAG)
15319     return false;
15320 
15321   bool IsLoad = true;
15322   bool IsMasked = false;
15323   SDValue Ptr;
15324   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15325                                 Ptr, TLI))
15326     return false;
15327 
15328   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15329   // out.  There is no reason to make this a preinc/predec.
15330   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15331       Ptr.getNode()->hasOneUse())
15332     return false;
15333 
15334   // Ask the target to do addressing mode selection.
15335   SDValue BasePtr;
15336   SDValue Offset;
15337   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15338   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15339     return false;
15340 
15341   // Backends without true r+i pre-indexed forms may need to pass a
15342   // constant base with a variable offset so that constant coercion
15343   // will work with the patterns in canonical form.
15344   bool Swapped = false;
15345   if (isa<ConstantSDNode>(BasePtr)) {
15346     std::swap(BasePtr, Offset);
15347     Swapped = true;
15348   }
15349 
15350   // Don't create a indexed load / store with zero offset.
15351   if (isNullConstant(Offset))
15352     return false;
15353 
15354   // Try turning it into a pre-indexed load / store except when:
15355   // 1) The new base ptr is a frame index.
15356   // 2) If N is a store and the new base ptr is either the same as or is a
15357   //    predecessor of the value being stored.
15358   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15359   //    that would create a cycle.
15360   // 4) All uses are load / store ops that use it as old base ptr.
15361 
15362   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
15363   // (plus the implicit offset) to a register to preinc anyway.
15364   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15365     return false;
15366 
15367   // Check #2.
15368   if (!IsLoad) {
15369     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15370                            : cast<StoreSDNode>(N)->getValue();
15371 
15372     // Would require a copy.
15373     if (Val == BasePtr)
15374       return false;
15375 
15376     // Would create a cycle.
15377     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15378       return false;
15379   }
15380 
15381   // Caches for hasPredecessorHelper.
15382   SmallPtrSet<const SDNode *, 32> Visited;
15383   SmallVector<const SDNode *, 16> Worklist;
15384   Worklist.push_back(N);
15385 
15386   // If the offset is a constant, there may be other adds of constants that
15387   // can be folded with this one. We should do this to avoid having to keep
15388   // a copy of the original base pointer.
15389   SmallVector<SDNode *, 16> OtherUses;
15390   if (isa<ConstantSDNode>(Offset))
15391     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15392                               UE = BasePtr.getNode()->use_end();
15393          UI != UE; ++UI) {
15394       SDUse &Use = UI.getUse();
15395       // Skip the use that is Ptr and uses of other results from BasePtr's
15396       // node (important for nodes that return multiple results).
15397       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15398         continue;
15399 
15400       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15401         continue;
15402 
15403       if (Use.getUser()->getOpcode() != ISD::ADD &&
15404           Use.getUser()->getOpcode() != ISD::SUB) {
15405         OtherUses.clear();
15406         break;
15407       }
15408 
15409       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15410       if (!isa<ConstantSDNode>(Op1)) {
15411         OtherUses.clear();
15412         break;
15413       }
15414 
15415       // FIXME: In some cases, we can be smarter about this.
15416       if (Op1.getValueType() != Offset.getValueType()) {
15417         OtherUses.clear();
15418         break;
15419       }
15420 
15421       OtherUses.push_back(Use.getUser());
15422     }
15423 
15424   if (Swapped)
15425     std::swap(BasePtr, Offset);
15426 
15427   // Now check for #3 and #4.
15428   bool RealUse = false;
15429 
15430   for (SDNode *Use : Ptr.getNode()->uses()) {
15431     if (Use == N)
15432       continue;
15433     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15434       return false;
15435 
15436     // If Ptr may be folded in addressing mode of other use, then it's
15437     // not profitable to do this transformation.
15438     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15439       RealUse = true;
15440   }
15441 
15442   if (!RealUse)
15443     return false;
15444 
15445   SDValue Result;
15446   if (!IsMasked) {
15447     if (IsLoad)
15448       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15449     else
15450       Result =
15451           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15452   } else {
15453     if (IsLoad)
15454       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15455                                         Offset, AM);
15456     else
15457       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15458                                          Offset, AM);
15459   }
15460   ++PreIndexedNodes;
15461   ++NodesCombined;
15462   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
15463              Result.getNode()->dump(&DAG); dbgs() << '\n');
15464   WorklistRemover DeadNodes(*this);
15465   if (IsLoad) {
15466     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15467     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15468   } else {
15469     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15470   }
15471 
15472   // Finally, since the node is now dead, remove it from the graph.
15473   deleteAndRecombine(N);
15474 
15475   if (Swapped)
15476     std::swap(BasePtr, Offset);
15477 
15478   // Replace other uses of BasePtr that can be updated to use Ptr
15479   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15480     unsigned OffsetIdx = 1;
15481     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15482       OffsetIdx = 0;
15483     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
15484            BasePtr.getNode() && "Expected BasePtr operand");
15485 
15486     // We need to replace ptr0 in the following expression:
15487     //   x0 * offset0 + y0 * ptr0 = t0
15488     // knowing that
15489     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15490     //
15491     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15492     // indexed load/store and the expression that needs to be re-written.
15493     //
15494     // Therefore, we have:
15495     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15496 
15497     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15498     const APInt &Offset0 = CN->getAPIntValue();
15499     const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15500     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15501     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15502     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15503     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15504 
15505     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15506 
15507     APInt CNV = Offset0;
15508     if (X0 < 0) CNV = -CNV;
15509     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15510     else CNV = CNV - Offset1;
15511 
15512     SDLoc DL(OtherUses[i]);
15513 
15514     // We can now generate the new expression.
15515     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15516     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15517 
15518     SDValue NewUse = DAG.getNode(Opcode,
15519                                  DL,
15520                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15521     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15522     deleteAndRecombine(OtherUses[i]);
15523   }
15524 
15525   // Replace the uses of Ptr with uses of the updated base value.
15526   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15527   deleteAndRecombine(Ptr.getNode());
15528   AddToWorklist(Result.getNode());
15529 
15530   return true;
15531 }
15532 
15533 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15534                                    SDValue &BasePtr, SDValue &Offset,
15535                                    ISD::MemIndexedMode &AM,
15536                                    SelectionDAG &DAG,
15537                                    const TargetLowering &TLI) {
15538   if (PtrUse == N ||
15539       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15540     return false;
15541 
15542   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15543     return false;
15544 
15545   // Don't create a indexed load / store with zero offset.
15546   if (isNullConstant(Offset))
15547     return false;
15548 
15549   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15550     return false;
15551 
15552   SmallPtrSet<const SDNode *, 32> Visited;
15553   for (SDNode *Use : BasePtr.getNode()->uses()) {
15554     if (Use == Ptr.getNode())
15555       continue;
15556 
15557     // No if there's a later user which could perform the index instead.
15558     if (isa<MemSDNode>(Use)) {
15559       bool IsLoad = true;
15560       bool IsMasked = false;
15561       SDValue OtherPtr;
15562       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15563                                    IsMasked, OtherPtr, TLI)) {
15564         SmallVector<const SDNode *, 2> Worklist;
15565         Worklist.push_back(Use);
15566         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15567           return false;
15568       }
15569     }
15570 
15571     // If all the uses are load / store addresses, then don't do the
15572     // transformation.
15573     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15574       for (SDNode *UseUse : Use->uses())
15575         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15576           return false;
15577     }
15578   }
15579   return true;
15580 }
15581 
15582 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15583                                          bool &IsMasked, SDValue &Ptr,
15584                                          SDValue &BasePtr, SDValue &Offset,
15585                                          ISD::MemIndexedMode &AM,
15586                                          SelectionDAG &DAG,
15587                                          const TargetLowering &TLI) {
15588   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15589                                 IsMasked, Ptr, TLI) ||
15590       Ptr.getNode()->hasOneUse())
15591     return nullptr;
15592 
15593   // Try turning it into a post-indexed load / store except when
15594   // 1) All uses are load / store ops that use it as base ptr (and
15595   //    it may be folded as addressing mmode).
15596   // 2) Op must be independent of N, i.e. Op is neither a predecessor
15597   //    nor a successor of N. Otherwise, if Op is folded that would
15598   //    create a cycle.
15599   for (SDNode *Op : Ptr->uses()) {
15600     // Check for #1.
15601     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15602       continue;
15603 
15604     // Check for #2.
15605     SmallPtrSet<const SDNode *, 32> Visited;
15606     SmallVector<const SDNode *, 8> Worklist;
15607     // Ptr is predecessor to both N and Op.
15608     Visited.insert(Ptr.getNode());
15609     Worklist.push_back(N);
15610     Worklist.push_back(Op);
15611     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15612         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15613       return Op;
15614   }
15615   return nullptr;
15616 }
15617 
15618 /// Try to combine a load/store with a add/sub of the base pointer node into a
15619 /// post-indexed load/store. The transformation folded the add/subtract into the
15620 /// new indexed load/store effectively and all of its uses are redirected to the
15621 /// new load/store.
15622 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15623   if (Level < AfterLegalizeDAG)
15624     return false;
15625 
15626   bool IsLoad = true;
15627   bool IsMasked = false;
15628   SDValue Ptr;
15629   SDValue BasePtr;
15630   SDValue Offset;
15631   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15632   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15633                                          Offset, AM, DAG, TLI);
15634   if (!Op)
15635     return false;
15636 
15637   SDValue Result;
15638   if (!IsMasked)
15639     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15640                                          Offset, AM)
15641                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15642                                           BasePtr, Offset, AM);
15643   else
15644     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15645                                                BasePtr, Offset, AM)
15646                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15647                                                 BasePtr, Offset, AM);
15648   ++PostIndexedNodes;
15649   ++NodesCombined;
15650   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
15651              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
15652              dbgs() << '\n');
15653   WorklistRemover DeadNodes(*this);
15654   if (IsLoad) {
15655     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15656     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15657   } else {
15658     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15659   }
15660 
15661   // Finally, since the node is now dead, remove it from the graph.
15662   deleteAndRecombine(N);
15663 
15664   // Replace the uses of Use with uses of the updated base value.
15665   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15666                                 Result.getValue(IsLoad ? 1 : 0));
15667   deleteAndRecombine(Op);
15668   return true;
15669 }
15670 
15671 /// Return the base-pointer arithmetic from an indexed \p LD.
15672 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15673   ISD::MemIndexedMode AM = LD->getAddressingMode();
15674   assert(AM != ISD::UNINDEXED);
15675   SDValue BP = LD->getOperand(1);
15676   SDValue Inc = LD->getOperand(2);
15677 
15678   // Some backends use TargetConstants for load offsets, but don't expect
15679   // TargetConstants in general ADD nodes. We can convert these constants into
15680   // regular Constants (if the constant is not opaque).
15681   assert((Inc.getOpcode() != ISD::TargetConstant ||
15682           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
15683          "Cannot split out indexing using opaque target constants");
15684   if (Inc.getOpcode() == ISD::TargetConstant) {
15685     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15686     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15687                           ConstInc->getValueType(0));
15688   }
15689 
15690   unsigned Opc =
15691       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15692   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15693 }
15694 
15695 static inline ElementCount numVectorEltsOrZero(EVT T) {
15696   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15697 }
15698 
15699 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15700   Val = ST->getValue();
15701   EVT STType = Val.getValueType();
15702   EVT STMemType = ST->getMemoryVT();
15703   if (STType == STMemType)
15704     return true;
15705   if (isTypeLegal(STMemType))
15706     return false; // fail.
15707   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15708       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15709     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15710     return true;
15711   }
15712   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15713       STType.isInteger() && STMemType.isInteger()) {
15714     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15715     return true;
15716   }
15717   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15718     Val = DAG.getBitcast(STMemType, Val);
15719     return true;
15720   }
15721   return false; // fail.
15722 }
15723 
15724 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15725   EVT LDMemType = LD->getMemoryVT();
15726   EVT LDType = LD->getValueType(0);
15727   assert(Val.getValueType() == LDMemType &&
15728          "Attempting to extend value of non-matching type");
15729   if (LDType == LDMemType)
15730     return true;
15731   if (LDMemType.isInteger() && LDType.isInteger()) {
15732     switch (LD->getExtensionType()) {
15733     case ISD::NON_EXTLOAD:
15734       Val = DAG.getBitcast(LDType, Val);
15735       return true;
15736     case ISD::EXTLOAD:
15737       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15738       return true;
15739     case ISD::SEXTLOAD:
15740       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15741       return true;
15742     case ISD::ZEXTLOAD:
15743       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15744       return true;
15745     }
15746   }
15747   return false;
15748 }
15749 
15750 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
15751   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
15752     return SDValue();
15753   SDValue Chain = LD->getOperand(0);
15754   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
15755   // TODO: Relax this restriction for unordered atomics (see D66309)
15756   if (!ST || !ST->isSimple())
15757     return SDValue();
15758 
15759   EVT LDType = LD->getValueType(0);
15760   EVT LDMemType = LD->getMemoryVT();
15761   EVT STMemType = ST->getMemoryVT();
15762   EVT STType = ST->getValue().getValueType();
15763 
15764   // There are two cases to consider here:
15765   //  1. The store is fixed width and the load is scalable. In this case we
15766   //     don't know at compile time if the store completely envelops the load
15767   //     so we abandon the optimisation.
15768   //  2. The store is scalable and the load is fixed width. We could
15769   //     potentially support a limited number of cases here, but there has been
15770   //     no cost-benefit analysis to prove it's worth it.
15771   bool LdStScalable = LDMemType.isScalableVector();
15772   if (LdStScalable != STMemType.isScalableVector())
15773     return SDValue();
15774 
15775   // If we are dealing with scalable vectors on a big endian platform the
15776   // calculation of offsets below becomes trickier, since we do not know at
15777   // compile time the absolute size of the vector. Until we've done more
15778   // analysis on big-endian platforms it seems better to bail out for now.
15779   if (LdStScalable && DAG.getDataLayout().isBigEndian())
15780     return SDValue();
15781 
15782   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
15783   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
15784   int64_t Offset;
15785   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
15786     return SDValue();
15787 
15788   // Normalize for Endianness. After this Offset=0 will denote that the least
15789   // significant bit in the loaded value maps to the least significant bit in
15790   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
15791   // n:th least significant byte of the stored value.
15792   if (DAG.getDataLayout().isBigEndian())
15793     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
15794               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
15795                  8 -
15796              Offset;
15797 
15798   // Check that the stored value cover all bits that are loaded.
15799   bool STCoversLD;
15800 
15801   TypeSize LdMemSize = LDMemType.getSizeInBits();
15802   TypeSize StMemSize = STMemType.getSizeInBits();
15803   if (LdStScalable)
15804     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
15805   else
15806     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
15807                                    StMemSize.getFixedSize());
15808 
15809   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
15810     if (LD->isIndexed()) {
15811       // Cannot handle opaque target constants and we must respect the user's
15812       // request not to split indexes from loads.
15813       if (!canSplitIdx(LD))
15814         return SDValue();
15815       SDValue Idx = SplitIndexingFromLoad(LD);
15816       SDValue Ops[] = {Val, Idx, Chain};
15817       return CombineTo(LD, Ops, 3);
15818     }
15819     return CombineTo(LD, Val, Chain);
15820   };
15821 
15822   if (!STCoversLD)
15823     return SDValue();
15824 
15825   // Memory as copy space (potentially masked).
15826   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
15827     // Simple case: Direct non-truncating forwarding
15828     if (LDType.getSizeInBits() == LdMemSize)
15829       return ReplaceLd(LD, ST->getValue(), Chain);
15830     // Can we model the truncate and extension with an and mask?
15831     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
15832         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
15833       // Mask to size of LDMemType
15834       auto Mask =
15835           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
15836                                                StMemSize.getFixedSize()),
15837                           SDLoc(ST), STType);
15838       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
15839       return ReplaceLd(LD, Val, Chain);
15840     }
15841   }
15842 
15843   // TODO: Deal with nonzero offset.
15844   if (LD->getBasePtr().isUndef() || Offset != 0)
15845     return SDValue();
15846   // Model necessary truncations / extenstions.
15847   SDValue Val;
15848   // Truncate Value To Stored Memory Size.
15849   do {
15850     if (!getTruncatedStoreValue(ST, Val))
15851       continue;
15852     if (!isTypeLegal(LDMemType))
15853       continue;
15854     if (STMemType != LDMemType) {
15855       // TODO: Support vectors? This requires extract_subvector/bitcast.
15856       if (!STMemType.isVector() && !LDMemType.isVector() &&
15857           STMemType.isInteger() && LDMemType.isInteger())
15858         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15859       else
15860         continue;
15861     }
15862     if (!extendLoadedValueToExtension(LD, Val))
15863       continue;
15864     return ReplaceLd(LD, Val, Chain);
15865   } while (false);
15866 
15867   // On failure, cleanup dead nodes we may have created.
15868   if (Val->use_empty())
15869     deleteAndRecombine(Val.getNode());
15870   return SDValue();
15871 }
15872 
15873 SDValue DAGCombiner::visitLOAD(SDNode *N) {
15874   LoadSDNode *LD  = cast<LoadSDNode>(N);
15875   SDValue Chain = LD->getChain();
15876   SDValue Ptr   = LD->getBasePtr();
15877 
15878   // If load is not volatile and there are no uses of the loaded value (and
15879   // the updated indexed value in case of indexed loads), change uses of the
15880   // chain value into uses of the chain input (i.e. delete the dead load).
15881   // TODO: Allow this for unordered atomics (see D66309)
15882   if (LD->isSimple()) {
15883     if (N->getValueType(1) == MVT::Other) {
15884       // Unindexed loads.
15885       if (!N->hasAnyUseOfValue(0)) {
15886         // It's not safe to use the two value CombineTo variant here. e.g.
15887         // v1, chain2 = load chain1, loc
15888         // v2, chain3 = load chain2, loc
15889         // v3         = add v2, c
15890         // Now we replace use of chain2 with chain1.  This makes the second load
15891         // isomorphic to the one we are deleting, and thus makes this load live.
15892         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
15893                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
15894                    dbgs() << "\n");
15895         WorklistRemover DeadNodes(*this);
15896         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15897         AddUsersToWorklist(Chain.getNode());
15898         if (N->use_empty())
15899           deleteAndRecombine(N);
15900 
15901         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15902       }
15903     } else {
15904       // Indexed loads.
15905       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
15906 
15907       // If this load has an opaque TargetConstant offset, then we cannot split
15908       // the indexing into an add/sub directly (that TargetConstant may not be
15909       // valid for a different type of node, and we cannot convert an opaque
15910       // target constant into a regular constant).
15911       bool CanSplitIdx = canSplitIdx(LD);
15912 
15913       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
15914         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15915         SDValue Index;
15916         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15917           Index = SplitIndexingFromLoad(LD);
15918           // Try to fold the base pointer arithmetic into subsequent loads and
15919           // stores.
15920           AddUsersToWorklist(N);
15921         } else
15922           Index = DAG.getUNDEF(N->getValueType(1));
15923         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
15924                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
15925                    dbgs() << " and 2 other values\n");
15926         WorklistRemover DeadNodes(*this);
15927         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15928         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15929         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15930         deleteAndRecombine(N);
15931         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15932       }
15933     }
15934   }
15935 
15936   // If this load is directly stored, replace the load value with the stored
15937   // value.
15938   if (auto V = ForwardStoreValueToDirectLoad(LD))
15939     return V;
15940 
15941   // Try to infer better alignment information than the load already has.
15942   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15943     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15944       if (*Alignment > LD->getAlign() &&
15945           isAligned(*Alignment, LD->getSrcValueOffset())) {
15946         SDValue NewLoad = DAG.getExtLoad(
15947             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15948             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15949             LD->getMemOperand()->getFlags(), LD->getAAInfo());
15950         // NewLoad will always be N as we are only refining the alignment
15951         assert(NewLoad.getNode() == N);
15952         (void)NewLoad;
15953       }
15954     }
15955   }
15956 
15957   if (LD->isUnindexed()) {
15958     // Walk up chain skipping non-aliasing memory nodes.
15959     SDValue BetterChain = FindBetterChain(LD, Chain);
15960 
15961     // If there is a better chain.
15962     if (Chain != BetterChain) {
15963       SDValue ReplLoad;
15964 
15965       // Replace the chain to void dependency.
15966       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15967         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15968                                BetterChain, Ptr, LD->getMemOperand());
15969       } else {
15970         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15971                                   LD->getValueType(0),
15972                                   BetterChain, Ptr, LD->getMemoryVT(),
15973                                   LD->getMemOperand());
15974       }
15975 
15976       // Create token factor to keep old chain connected.
15977       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15978                                   MVT::Other, Chain, ReplLoad.getValue(1));
15979 
15980       // Replace uses with load result and token factor
15981       return CombineTo(N, ReplLoad.getValue(0), Token);
15982     }
15983   }
15984 
15985   // Try transforming N to an indexed load.
15986   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15987     return SDValue(N, 0);
15988 
15989   // Try to slice up N to more direct loads if the slices are mapped to
15990   // different register banks or pairing can take place.
15991   if (SliceUpLoad(N))
15992     return SDValue(N, 0);
15993 
15994   return SDValue();
15995 }
15996 
15997 namespace {
15998 
15999 /// Helper structure used to slice a load in smaller loads.
16000 /// Basically a slice is obtained from the following sequence:
16001 /// Origin = load Ty1, Base
16002 /// Shift = srl Ty1 Origin, CstTy Amount
16003 /// Inst = trunc Shift to Ty2
16004 ///
16005 /// Then, it will be rewritten into:
16006 /// Slice = load SliceTy, Base + SliceOffset
16007 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16008 ///
16009 /// SliceTy is deduced from the number of bits that are actually used to
16010 /// build Inst.
16011 struct LoadedSlice {
16012   /// Helper structure used to compute the cost of a slice.
16013   struct Cost {
16014     /// Are we optimizing for code size.
16015     bool ForCodeSize = false;
16016 
16017     /// Various cost.
16018     unsigned Loads = 0;
16019     unsigned Truncates = 0;
16020     unsigned CrossRegisterBanksCopies = 0;
16021     unsigned ZExts = 0;
16022     unsigned Shift = 0;
16023 
16024     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16025 
16026     /// Get the cost of one isolated slice.
16027     Cost(const LoadedSlice &LS, bool ForCodeSize)
16028         : ForCodeSize(ForCodeSize), Loads(1) {
16029       EVT TruncType = LS.Inst->getValueType(0);
16030       EVT LoadedType = LS.getLoadedType();
16031       if (TruncType != LoadedType &&
16032           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16033         ZExts = 1;
16034     }
16035 
16036     /// Account for slicing gain in the current cost.
16037     /// Slicing provide a few gains like removing a shift or a
16038     /// truncate. This method allows to grow the cost of the original
16039     /// load with the gain from this slice.
16040     void addSliceGain(const LoadedSlice &LS) {
16041       // Each slice saves a truncate.
16042       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16043       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16044                               LS.Inst->getValueType(0)))
16045         ++Truncates;
16046       // If there is a shift amount, this slice gets rid of it.
16047       if (LS.Shift)
16048         ++Shift;
16049       // If this slice can merge a cross register bank copy, account for it.
16050       if (LS.canMergeExpensiveCrossRegisterBankCopy())
16051         ++CrossRegisterBanksCopies;
16052     }
16053 
16054     Cost &operator+=(const Cost &RHS) {
16055       Loads += RHS.Loads;
16056       Truncates += RHS.Truncates;
16057       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16058       ZExts += RHS.ZExts;
16059       Shift += RHS.Shift;
16060       return *this;
16061     }
16062 
16063     bool operator==(const Cost &RHS) const {
16064       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16065              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16066              ZExts == RHS.ZExts && Shift == RHS.Shift;
16067     }
16068 
16069     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16070 
16071     bool operator<(const Cost &RHS) const {
16072       // Assume cross register banks copies are as expensive as loads.
16073       // FIXME: Do we want some more target hooks?
16074       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16075       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16076       // Unless we are optimizing for code size, consider the
16077       // expensive operation first.
16078       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16079         return ExpensiveOpsLHS < ExpensiveOpsRHS;
16080       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16081              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16082     }
16083 
16084     bool operator>(const Cost &RHS) const { return RHS < *this; }
16085 
16086     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16087 
16088     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16089   };
16090 
16091   // The last instruction that represent the slice. This should be a
16092   // truncate instruction.
16093   SDNode *Inst;
16094 
16095   // The original load instruction.
16096   LoadSDNode *Origin;
16097 
16098   // The right shift amount in bits from the original load.
16099   unsigned Shift;
16100 
16101   // The DAG from which Origin came from.
16102   // This is used to get some contextual information about legal types, etc.
16103   SelectionDAG *DAG;
16104 
16105   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16106               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16107       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16108 
16109   /// Get the bits used in a chunk of bits \p BitWidth large.
16110   /// \return Result is \p BitWidth and has used bits set to 1 and
16111   ///         not used bits set to 0.
16112   APInt getUsedBits() const {
16113     // Reproduce the trunc(lshr) sequence:
16114     // - Start from the truncated value.
16115     // - Zero extend to the desired bit width.
16116     // - Shift left.
16117     assert(Origin && "No original load to compare against.");
16118     unsigned BitWidth = Origin->getValueSizeInBits(0);
16119     assert(Inst && "This slice is not bound to an instruction");
16120     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
16121            "Extracted slice is bigger than the whole type!");
16122     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16123     UsedBits.setAllBits();
16124     UsedBits = UsedBits.zext(BitWidth);
16125     UsedBits <<= Shift;
16126     return UsedBits;
16127   }
16128 
16129   /// Get the size of the slice to be loaded in bytes.
16130   unsigned getLoadedSize() const {
16131     unsigned SliceSize = getUsedBits().countPopulation();
16132     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
16133     return SliceSize / 8;
16134   }
16135 
16136   /// Get the type that will be loaded for this slice.
16137   /// Note: This may not be the final type for the slice.
16138   EVT getLoadedType() const {
16139     assert(DAG && "Missing context");
16140     LLVMContext &Ctxt = *DAG->getContext();
16141     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16142   }
16143 
16144   /// Get the alignment of the load used for this slice.
16145   Align getAlign() const {
16146     Align Alignment = Origin->getAlign();
16147     uint64_t Offset = getOffsetFromBase();
16148     if (Offset != 0)
16149       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16150     return Alignment;
16151   }
16152 
16153   /// Check if this slice can be rewritten with legal operations.
16154   bool isLegal() const {
16155     // An invalid slice is not legal.
16156     if (!Origin || !Inst || !DAG)
16157       return false;
16158 
16159     // Offsets are for indexed load only, we do not handle that.
16160     if (!Origin->getOffset().isUndef())
16161       return false;
16162 
16163     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16164 
16165     // Check that the type is legal.
16166     EVT SliceType = getLoadedType();
16167     if (!TLI.isTypeLegal(SliceType))
16168       return false;
16169 
16170     // Check that the load is legal for this type.
16171     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16172       return false;
16173 
16174     // Check that the offset can be computed.
16175     // 1. Check its type.
16176     EVT PtrType = Origin->getBasePtr().getValueType();
16177     if (PtrType == MVT::Untyped || PtrType.isExtended())
16178       return false;
16179 
16180     // 2. Check that it fits in the immediate.
16181     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16182       return false;
16183 
16184     // 3. Check that the computation is legal.
16185     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16186       return false;
16187 
16188     // Check that the zext is legal if it needs one.
16189     EVT TruncateType = Inst->getValueType(0);
16190     if (TruncateType != SliceType &&
16191         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16192       return false;
16193 
16194     return true;
16195   }
16196 
16197   /// Get the offset in bytes of this slice in the original chunk of
16198   /// bits.
16199   /// \pre DAG != nullptr.
16200   uint64_t getOffsetFromBase() const {
16201     assert(DAG && "Missing context.");
16202     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16203     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
16204     uint64_t Offset = Shift / 8;
16205     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16206     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
16207            "The size of the original loaded type is not a multiple of a"
16208            " byte.");
16209     // If Offset is bigger than TySizeInBytes, it means we are loading all
16210     // zeros. This should have been optimized before in the process.
16211     assert(TySizeInBytes > Offset &&
16212            "Invalid shift amount for given loaded size");
16213     if (IsBigEndian)
16214       Offset = TySizeInBytes - Offset - getLoadedSize();
16215     return Offset;
16216   }
16217 
16218   /// Generate the sequence of instructions to load the slice
16219   /// represented by this object and redirect the uses of this slice to
16220   /// this new sequence of instructions.
16221   /// \pre this->Inst && this->Origin are valid Instructions and this
16222   /// object passed the legal check: LoadedSlice::isLegal returned true.
16223   /// \return The last instruction of the sequence used to load the slice.
16224   SDValue loadSlice() const {
16225     assert(Inst && Origin && "Unable to replace a non-existing slice.");
16226     const SDValue &OldBaseAddr = Origin->getBasePtr();
16227     SDValue BaseAddr = OldBaseAddr;
16228     // Get the offset in that chunk of bytes w.r.t. the endianness.
16229     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16230     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
16231     if (Offset) {
16232       // BaseAddr = BaseAddr + Offset.
16233       EVT ArithType = BaseAddr.getValueType();
16234       SDLoc DL(Origin);
16235       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16236                               DAG->getConstant(Offset, DL, ArithType));
16237     }
16238 
16239     // Create the type of the loaded slice according to its size.
16240     EVT SliceType = getLoadedType();
16241 
16242     // Create the load for the slice.
16243     SDValue LastInst =
16244         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16245                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16246                      Origin->getMemOperand()->getFlags());
16247     // If the final type is not the same as the loaded type, this means that
16248     // we have to pad with zero. Create a zero extend for that.
16249     EVT FinalType = Inst->getValueType(0);
16250     if (SliceType != FinalType)
16251       LastInst =
16252           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16253     return LastInst;
16254   }
16255 
16256   /// Check if this slice can be merged with an expensive cross register
16257   /// bank copy. E.g.,
16258   /// i = load i32
16259   /// f = bitcast i32 i to float
16260   bool canMergeExpensiveCrossRegisterBankCopy() const {
16261     if (!Inst || !Inst->hasOneUse())
16262       return false;
16263     SDNode *Use = *Inst->use_begin();
16264     if (Use->getOpcode() != ISD::BITCAST)
16265       return false;
16266     assert(DAG && "Missing context");
16267     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16268     EVT ResVT = Use->getValueType(0);
16269     const TargetRegisterClass *ResRC =
16270         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16271     const TargetRegisterClass *ArgRC =
16272         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16273                            Use->getOperand(0)->isDivergent());
16274     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16275       return false;
16276 
16277     // At this point, we know that we perform a cross-register-bank copy.
16278     // Check if it is expensive.
16279     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16280     // Assume bitcasts are cheap, unless both register classes do not
16281     // explicitly share a common sub class.
16282     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16283       return false;
16284 
16285     // Check if it will be merged with the load.
16286     // 1. Check the alignment / fast memory access constraint.
16287     bool IsFast = false;
16288     if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
16289                                 Origin->getAddressSpace(), getAlign(),
16290                                 Origin->getMemOperand()->getFlags(), &IsFast) ||
16291         !IsFast)
16292       return false;
16293 
16294     // 2. Check that the load is a legal operation for that type.
16295     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16296       return false;
16297 
16298     // 3. Check that we do not have a zext in the way.
16299     if (Inst->getValueType(0) != getLoadedType())
16300       return false;
16301 
16302     return true;
16303   }
16304 };
16305 
16306 } // end anonymous namespace
16307 
16308 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
16309 /// \p UsedBits looks like 0..0 1..1 0..0.
16310 static bool areUsedBitsDense(const APInt &UsedBits) {
16311   // If all the bits are one, this is dense!
16312   if (UsedBits.isAllOnes())
16313     return true;
16314 
16315   // Get rid of the unused bits on the right.
16316   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16317   // Get rid of the unused bits on the left.
16318   if (NarrowedUsedBits.countLeadingZeros())
16319     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16320   // Check that the chunk of bits is completely used.
16321   return NarrowedUsedBits.isAllOnes();
16322 }
16323 
16324 /// Check whether or not \p First and \p Second are next to each other
16325 /// in memory. This means that there is no hole between the bits loaded
16326 /// by \p First and the bits loaded by \p Second.
16327 static bool areSlicesNextToEachOther(const LoadedSlice &First,
16328                                      const LoadedSlice &Second) {
16329   assert(First.Origin == Second.Origin && First.Origin &&
16330          "Unable to match different memory origins.");
16331   APInt UsedBits = First.getUsedBits();
16332   assert((UsedBits & Second.getUsedBits()) == 0 &&
16333          "Slices are not supposed to overlap.");
16334   UsedBits |= Second.getUsedBits();
16335   return areUsedBitsDense(UsedBits);
16336 }
16337 
16338 /// Adjust the \p GlobalLSCost according to the target
16339 /// paring capabilities and the layout of the slices.
16340 /// \pre \p GlobalLSCost should account for at least as many loads as
16341 /// there is in the slices in \p LoadedSlices.
16342 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16343                                  LoadedSlice::Cost &GlobalLSCost) {
16344   unsigned NumberOfSlices = LoadedSlices.size();
16345   // If there is less than 2 elements, no pairing is possible.
16346   if (NumberOfSlices < 2)
16347     return;
16348 
16349   // Sort the slices so that elements that are likely to be next to each
16350   // other in memory are next to each other in the list.
16351   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16352     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
16353     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16354   });
16355   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16356   // First (resp. Second) is the first (resp. Second) potentially candidate
16357   // to be placed in a paired load.
16358   const LoadedSlice *First = nullptr;
16359   const LoadedSlice *Second = nullptr;
16360   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16361                 // Set the beginning of the pair.
16362                                                            First = Second) {
16363     Second = &LoadedSlices[CurrSlice];
16364 
16365     // If First is NULL, it means we start a new pair.
16366     // Get to the next slice.
16367     if (!First)
16368       continue;
16369 
16370     EVT LoadedType = First->getLoadedType();
16371 
16372     // If the types of the slices are different, we cannot pair them.
16373     if (LoadedType != Second->getLoadedType())
16374       continue;
16375 
16376     // Check if the target supplies paired loads for this type.
16377     Align RequiredAlignment;
16378     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16379       // move to the next pair, this type is hopeless.
16380       Second = nullptr;
16381       continue;
16382     }
16383     // Check if we meet the alignment requirement.
16384     if (First->getAlign() < RequiredAlignment)
16385       continue;
16386 
16387     // Check that both loads are next to each other in memory.
16388     if (!areSlicesNextToEachOther(*First, *Second))
16389       continue;
16390 
16391     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
16392     --GlobalLSCost.Loads;
16393     // Move to the next pair.
16394     Second = nullptr;
16395   }
16396 }
16397 
16398 /// Check the profitability of all involved LoadedSlice.
16399 /// Currently, it is considered profitable if there is exactly two
16400 /// involved slices (1) which are (2) next to each other in memory, and
16401 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16402 ///
16403 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
16404 /// the elements themselves.
16405 ///
16406 /// FIXME: When the cost model will be mature enough, we can relax
16407 /// constraints (1) and (2).
16408 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16409                                 const APInt &UsedBits, bool ForCodeSize) {
16410   unsigned NumberOfSlices = LoadedSlices.size();
16411   if (StressLoadSlicing)
16412     return NumberOfSlices > 1;
16413 
16414   // Check (1).
16415   if (NumberOfSlices != 2)
16416     return false;
16417 
16418   // Check (2).
16419   if (!areUsedBitsDense(UsedBits))
16420     return false;
16421 
16422   // Check (3).
16423   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16424   // The original code has one big load.
16425   OrigCost.Loads = 1;
16426   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16427     const LoadedSlice &LS = LoadedSlices[CurrSlice];
16428     // Accumulate the cost of all the slices.
16429     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16430     GlobalSlicingCost += SliceCost;
16431 
16432     // Account as cost in the original configuration the gain obtained
16433     // with the current slices.
16434     OrigCost.addSliceGain(LS);
16435   }
16436 
16437   // If the target supports paired load, adjust the cost accordingly.
16438   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16439   return OrigCost > GlobalSlicingCost;
16440 }
16441 
16442 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
16443 /// operations, split it in the various pieces being extracted.
16444 ///
16445 /// This sort of thing is introduced by SROA.
16446 /// This slicing takes care not to insert overlapping loads.
16447 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
16448 bool DAGCombiner::SliceUpLoad(SDNode *N) {
16449   if (Level < AfterLegalizeDAG)
16450     return false;
16451 
16452   LoadSDNode *LD = cast<LoadSDNode>(N);
16453   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16454       !LD->getValueType(0).isInteger())
16455     return false;
16456 
16457   // The algorithm to split up a load of a scalable vector into individual
16458   // elements currently requires knowing the length of the loaded type,
16459   // so will need adjusting to work on scalable vectors.
16460   if (LD->getValueType(0).isScalableVector())
16461     return false;
16462 
16463   // Keep track of already used bits to detect overlapping values.
16464   // In that case, we will just abort the transformation.
16465   APInt UsedBits(LD->getValueSizeInBits(0), 0);
16466 
16467   SmallVector<LoadedSlice, 4> LoadedSlices;
16468 
16469   // Check if this load is used as several smaller chunks of bits.
16470   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16471   // of computation for each trunc.
16472   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16473        UI != UIEnd; ++UI) {
16474     // Skip the uses of the chain.
16475     if (UI.getUse().getResNo() != 0)
16476       continue;
16477 
16478     SDNode *User = *UI;
16479     unsigned Shift = 0;
16480 
16481     // Check if this is a trunc(lshr).
16482     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16483         isa<ConstantSDNode>(User->getOperand(1))) {
16484       Shift = User->getConstantOperandVal(1);
16485       User = *User->use_begin();
16486     }
16487 
16488     // At this point, User is a Truncate, iff we encountered, trunc or
16489     // trunc(lshr).
16490     if (User->getOpcode() != ISD::TRUNCATE)
16491       return false;
16492 
16493     // The width of the type must be a power of 2 and greater than 8-bits.
16494     // Otherwise the load cannot be represented in LLVM IR.
16495     // Moreover, if we shifted with a non-8-bits multiple, the slice
16496     // will be across several bytes. We do not support that.
16497     unsigned Width = User->getValueSizeInBits(0);
16498     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16499       return false;
16500 
16501     // Build the slice for this chain of computations.
16502     LoadedSlice LS(User, LD, Shift, &DAG);
16503     APInt CurrentUsedBits = LS.getUsedBits();
16504 
16505     // Check if this slice overlaps with another.
16506     if ((CurrentUsedBits & UsedBits) != 0)
16507       return false;
16508     // Update the bits used globally.
16509     UsedBits |= CurrentUsedBits;
16510 
16511     // Check if the new slice would be legal.
16512     if (!LS.isLegal())
16513       return false;
16514 
16515     // Record the slice.
16516     LoadedSlices.push_back(LS);
16517   }
16518 
16519   // Abort slicing if it does not seem to be profitable.
16520   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16521     return false;
16522 
16523   ++SlicedLoads;
16524 
16525   // Rewrite each chain to use an independent load.
16526   // By construction, each chain can be represented by a unique load.
16527 
16528   // Prepare the argument for the new token factor for all the slices.
16529   SmallVector<SDValue, 8> ArgChains;
16530   for (const LoadedSlice &LS : LoadedSlices) {
16531     SDValue SliceInst = LS.loadSlice();
16532     CombineTo(LS.Inst, SliceInst, true);
16533     if (SliceInst.getOpcode() != ISD::LOAD)
16534       SliceInst = SliceInst.getOperand(0);
16535     assert(SliceInst->getOpcode() == ISD::LOAD &&
16536            "It takes more than a zext to get to the loaded slice!!");
16537     ArgChains.push_back(SliceInst.getValue(1));
16538   }
16539 
16540   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16541                               ArgChains);
16542   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16543   AddToWorklist(Chain.getNode());
16544   return true;
16545 }
16546 
16547 /// Check to see if V is (and load (ptr), imm), where the load is having
16548 /// specific bytes cleared out.  If so, return the byte size being masked out
16549 /// and the shift amount.
16550 static std::pair<unsigned, unsigned>
16551 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16552   std::pair<unsigned, unsigned> Result(0, 0);
16553 
16554   // Check for the structure we're looking for.
16555   if (V->getOpcode() != ISD::AND ||
16556       !isa<ConstantSDNode>(V->getOperand(1)) ||
16557       !ISD::isNormalLoad(V->getOperand(0).getNode()))
16558     return Result;
16559 
16560   // Check the chain and pointer.
16561   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16562   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
16563 
16564   // This only handles simple types.
16565   if (V.getValueType() != MVT::i16 &&
16566       V.getValueType() != MVT::i32 &&
16567       V.getValueType() != MVT::i64)
16568     return Result;
16569 
16570   // Check the constant mask.  Invert it so that the bits being masked out are
16571   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
16572   // follow the sign bit for uniformity.
16573   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16574   unsigned NotMaskLZ = countLeadingZeros(NotMask);
16575   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
16576   unsigned NotMaskTZ = countTrailingZeros(NotMask);
16577   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
16578   if (NotMaskLZ == 64) return Result;  // All zero mask.
16579 
16580   // See if we have a continuous run of bits.  If so, we have 0*1+0*
16581   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16582     return Result;
16583 
16584   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16585   if (V.getValueType() != MVT::i64 && NotMaskLZ)
16586     NotMaskLZ -= 64-V.getValueSizeInBits();
16587 
16588   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16589   switch (MaskedBytes) {
16590   case 1:
16591   case 2:
16592   case 4: break;
16593   default: return Result; // All one mask, or 5-byte mask.
16594   }
16595 
16596   // Verify that the first bit starts at a multiple of mask so that the access
16597   // is aligned the same as the access width.
16598   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16599 
16600   // For narrowing to be valid, it must be the case that the load the
16601   // immediately preceding memory operation before the store.
16602   if (LD == Chain.getNode())
16603     ; // ok.
16604   else if (Chain->getOpcode() == ISD::TokenFactor &&
16605            SDValue(LD, 1).hasOneUse()) {
16606     // LD has only 1 chain use so they are no indirect dependencies.
16607     if (!LD->isOperandOf(Chain.getNode()))
16608       return Result;
16609   } else
16610     return Result; // Fail.
16611 
16612   Result.first = MaskedBytes;
16613   Result.second = NotMaskTZ/8;
16614   return Result;
16615 }
16616 
16617 /// Check to see if IVal is something that provides a value as specified by
16618 /// MaskInfo. If so, replace the specified store with a narrower store of
16619 /// truncated IVal.
16620 static SDValue
16621 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16622                                 SDValue IVal, StoreSDNode *St,
16623                                 DAGCombiner *DC) {
16624   unsigned NumBytes = MaskInfo.first;
16625   unsigned ByteShift = MaskInfo.second;
16626   SelectionDAG &DAG = DC->getDAG();
16627 
16628   // Check to see if IVal is all zeros in the part being masked in by the 'or'
16629   // that uses this.  If not, this is not a replacement.
16630   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16631                                   ByteShift*8, (ByteShift+NumBytes)*8);
16632   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16633 
16634   // Check that it is legal on the target to do this.  It is legal if the new
16635   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16636   // legalization (and the target doesn't explicitly think this is a bad idea).
16637   MVT VT = MVT::getIntegerVT(NumBytes * 8);
16638   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16639   if (!DC->isTypeLegal(VT))
16640     return SDValue();
16641   if (St->getMemOperand() &&
16642       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16643                               *St->getMemOperand()))
16644     return SDValue();
16645 
16646   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
16647   // shifted by ByteShift and truncated down to NumBytes.
16648   if (ByteShift) {
16649     SDLoc DL(IVal);
16650     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16651                        DAG.getConstant(ByteShift*8, DL,
16652                                     DC->getShiftAmountTy(IVal.getValueType())));
16653   }
16654 
16655   // Figure out the offset for the store and the alignment of the access.
16656   unsigned StOffset;
16657   if (DAG.getDataLayout().isLittleEndian())
16658     StOffset = ByteShift;
16659   else
16660     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16661 
16662   SDValue Ptr = St->getBasePtr();
16663   if (StOffset) {
16664     SDLoc DL(IVal);
16665     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16666   }
16667 
16668   // Truncate down to the new size.
16669   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16670 
16671   ++OpsNarrowed;
16672   return DAG
16673       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16674                 St->getPointerInfo().getWithOffset(StOffset),
16675                 St->getOriginalAlign());
16676 }
16677 
16678 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16679 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16680 /// narrowing the load and store if it would end up being a win for performance
16681 /// or code size.
16682 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16683   StoreSDNode *ST  = cast<StoreSDNode>(N);
16684   if (!ST->isSimple())
16685     return SDValue();
16686 
16687   SDValue Chain = ST->getChain();
16688   SDValue Value = ST->getValue();
16689   SDValue Ptr   = ST->getBasePtr();
16690   EVT VT = Value.getValueType();
16691 
16692   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16693     return SDValue();
16694 
16695   unsigned Opc = Value.getOpcode();
16696 
16697   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16698   // is a byte mask indicating a consecutive number of bytes, check to see if
16699   // Y is known to provide just those bytes.  If so, we try to replace the
16700   // load + replace + store sequence with a single (narrower) store, which makes
16701   // the load dead.
16702   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16703     std::pair<unsigned, unsigned> MaskedLoad;
16704     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16705     if (MaskedLoad.first)
16706       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16707                                                   Value.getOperand(1), ST,this))
16708         return NewST;
16709 
16710     // Or is commutative, so try swapping X and Y.
16711     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16712     if (MaskedLoad.first)
16713       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16714                                                   Value.getOperand(0), ST,this))
16715         return NewST;
16716   }
16717 
16718   if (!EnableReduceLoadOpStoreWidth)
16719     return SDValue();
16720 
16721   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16722       Value.getOperand(1).getOpcode() != ISD::Constant)
16723     return SDValue();
16724 
16725   SDValue N0 = Value.getOperand(0);
16726   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16727       Chain == SDValue(N0.getNode(), 1)) {
16728     LoadSDNode *LD = cast<LoadSDNode>(N0);
16729     if (LD->getBasePtr() != Ptr ||
16730         LD->getPointerInfo().getAddrSpace() !=
16731         ST->getPointerInfo().getAddrSpace())
16732       return SDValue();
16733 
16734     // Find the type to narrow it the load / op / store to.
16735     SDValue N1 = Value.getOperand(1);
16736     unsigned BitWidth = N1.getValueSizeInBits();
16737     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16738     if (Opc == ISD::AND)
16739       Imm ^= APInt::getAllOnes(BitWidth);
16740     if (Imm == 0 || Imm.isAllOnes())
16741       return SDValue();
16742     unsigned ShAmt = Imm.countTrailingZeros();
16743     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16744     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16745     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16746     // The narrowing should be profitable, the load/store operation should be
16747     // legal (or custom) and the store size should be equal to the NewVT width.
16748     while (NewBW < BitWidth &&
16749            (NewVT.getStoreSizeInBits() != NewBW ||
16750             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
16751             !TLI.isNarrowingProfitable(VT, NewVT))) {
16752       NewBW = NextPowerOf2(NewBW);
16753       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16754     }
16755     if (NewBW >= BitWidth)
16756       return SDValue();
16757 
16758     // If the lsb changed does not start at the type bitwidth boundary,
16759     // start at the previous one.
16760     if (ShAmt % NewBW)
16761       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
16762     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
16763                                    std::min(BitWidth, ShAmt + NewBW));
16764     if ((Imm & Mask) == Imm) {
16765       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
16766       if (Opc == ISD::AND)
16767         NewImm ^= APInt::getAllOnes(NewBW);
16768       uint64_t PtrOff = ShAmt / 8;
16769       // For big endian targets, we need to adjust the offset to the pointer to
16770       // load the correct bytes.
16771       if (DAG.getDataLayout().isBigEndian())
16772         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
16773 
16774       bool IsFast = false;
16775       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
16776       if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
16777                                   LD->getAddressSpace(), NewAlign,
16778                                   LD->getMemOperand()->getFlags(), &IsFast) ||
16779           !IsFast)
16780         return SDValue();
16781 
16782       SDValue NewPtr =
16783           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
16784       SDValue NewLD =
16785           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
16786                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
16787                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
16788       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
16789                                    DAG.getConstant(NewImm, SDLoc(Value),
16790                                                    NewVT));
16791       SDValue NewST =
16792           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
16793                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
16794 
16795       AddToWorklist(NewPtr.getNode());
16796       AddToWorklist(NewLD.getNode());
16797       AddToWorklist(NewVal.getNode());
16798       WorklistRemover DeadNodes(*this);
16799       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
16800       ++OpsNarrowed;
16801       return NewST;
16802     }
16803   }
16804 
16805   return SDValue();
16806 }
16807 
16808 /// For a given floating point load / store pair, if the load value isn't used
16809 /// by any other operations, then consider transforming the pair to integer
16810 /// load / store operations if the target deems the transformation profitable.
16811 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
16812   StoreSDNode *ST  = cast<StoreSDNode>(N);
16813   SDValue Value = ST->getValue();
16814   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
16815       Value.hasOneUse()) {
16816     LoadSDNode *LD = cast<LoadSDNode>(Value);
16817     EVT VT = LD->getMemoryVT();
16818     if (!VT.isFloatingPoint() ||
16819         VT != ST->getMemoryVT() ||
16820         LD->isNonTemporal() ||
16821         ST->isNonTemporal() ||
16822         LD->getPointerInfo().getAddrSpace() != 0 ||
16823         ST->getPointerInfo().getAddrSpace() != 0)
16824       return SDValue();
16825 
16826     TypeSize VTSize = VT.getSizeInBits();
16827 
16828     // We don't know the size of scalable types at compile time so we cannot
16829     // create an integer of the equivalent size.
16830     if (VTSize.isScalable())
16831       return SDValue();
16832 
16833     bool FastLD = false, FastST = false;
16834     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
16835     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
16836         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
16837         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
16838         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
16839         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
16840                                 *LD->getMemOperand(), &FastLD) ||
16841         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
16842                                 *ST->getMemOperand(), &FastST) ||
16843         !FastLD || !FastST)
16844       return SDValue();
16845 
16846     SDValue NewLD =
16847         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16848                     LD->getPointerInfo(), LD->getAlign());
16849 
16850     SDValue NewST =
16851         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16852                      ST->getPointerInfo(), ST->getAlign());
16853 
16854     AddToWorklist(NewLD.getNode());
16855     AddToWorklist(NewST.getNode());
16856     WorklistRemover DeadNodes(*this);
16857     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16858     ++LdStFP2Int;
16859     return NewST;
16860   }
16861 
16862   return SDValue();
16863 }
16864 
16865 // This is a helper function for visitMUL to check the profitability
16866 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16867 // MulNode is the original multiply, AddNode is (add x, c1),
16868 // and ConstNode is c2.
16869 //
16870 // If the (add x, c1) has multiple uses, we could increase
16871 // the number of adds if we make this transformation.
16872 // It would only be worth doing this if we can remove a
16873 // multiply in the process. Check for that here.
16874 // To illustrate:
16875 //     (A + c1) * c3
16876 //     (A + c2) * c3
16877 // We're checking for cases where we have common "c3 * A" expressions.
16878 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16879                                               SDValue &AddNode,
16880                                               SDValue &ConstNode) {
16881   APInt Val;
16882 
16883   // If the add only has one use, and the target thinks the folding is
16884   // profitable or does not lead to worse code, this would be OK to do.
16885   if (AddNode.getNode()->hasOneUse() &&
16886       TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
16887     return true;
16888 
16889   // Walk all the users of the constant with which we're multiplying.
16890   for (SDNode *Use : ConstNode->uses()) {
16891     if (Use == MulNode) // This use is the one we're on right now. Skip it.
16892       continue;
16893 
16894     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16895       SDNode *OtherOp;
16896       SDNode *MulVar = AddNode.getOperand(0).getNode();
16897 
16898       // OtherOp is what we're multiplying against the constant.
16899       if (Use->getOperand(0) == ConstNode)
16900         OtherOp = Use->getOperand(1).getNode();
16901       else
16902         OtherOp = Use->getOperand(0).getNode();
16903 
16904       // Check to see if multiply is with the same operand of our "add".
16905       //
16906       //     ConstNode  = CONST
16907       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
16908       //     ...
16909       //     AddNode  = (A + c1)  <-- MulVar is A.
16910       //         = AddNode * ConstNode   <-- current visiting instruction.
16911       //
16912       // If we make this transformation, we will have a common
16913       // multiply (ConstNode * A) that we can save.
16914       if (OtherOp == MulVar)
16915         return true;
16916 
16917       // Now check to see if a future expansion will give us a common
16918       // multiply.
16919       //
16920       //     ConstNode  = CONST
16921       //     AddNode    = (A + c1)
16922       //     ...   = AddNode * ConstNode <-- current visiting instruction.
16923       //     ...
16924       //     OtherOp = (A + c2)
16925       //     Use     = OtherOp * ConstNode <-- visiting Use.
16926       //
16927       // If we make this transformation, we will have a common
16928       // multiply (CONST * A) after we also do the same transformation
16929       // to the "t2" instruction.
16930       if (OtherOp->getOpcode() == ISD::ADD &&
16931           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16932           OtherOp->getOperand(0).getNode() == MulVar)
16933         return true;
16934     }
16935   }
16936 
16937   // Didn't find a case where this would be profitable.
16938   return false;
16939 }
16940 
16941 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16942                                          unsigned NumStores) {
16943   SmallVector<SDValue, 8> Chains;
16944   SmallPtrSet<const SDNode *, 8> Visited;
16945   SDLoc StoreDL(StoreNodes[0].MemNode);
16946 
16947   for (unsigned i = 0; i < NumStores; ++i) {
16948     Visited.insert(StoreNodes[i].MemNode);
16949   }
16950 
16951   // don't include nodes that are children or repeated nodes.
16952   for (unsigned i = 0; i < NumStores; ++i) {
16953     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16954       Chains.push_back(StoreNodes[i].MemNode->getChain());
16955   }
16956 
16957   assert(Chains.size() > 0 && "Chain should have generated a chain");
16958   return DAG.getTokenFactor(StoreDL, Chains);
16959 }
16960 
16961 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16962     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16963     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16964   // Make sure we have something to merge.
16965   if (NumStores < 2)
16966     return false;
16967 
16968   assert((!UseTrunc || !UseVector) &&
16969          "This optimization cannot emit a vector truncating store");
16970 
16971   // The latest Node in the DAG.
16972   SDLoc DL(StoreNodes[0].MemNode);
16973 
16974   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16975   unsigned SizeInBits = NumStores * ElementSizeBits;
16976   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16977 
16978   Optional<MachineMemOperand::Flags> Flags;
16979   AAMDNodes AAInfo;
16980   for (unsigned I = 0; I != NumStores; ++I) {
16981     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16982     if (!Flags) {
16983       Flags = St->getMemOperand()->getFlags();
16984       AAInfo = St->getAAInfo();
16985       continue;
16986     }
16987     // Skip merging if there's an inconsistent flag.
16988     if (Flags != St->getMemOperand()->getFlags())
16989       return false;
16990     // Concatenate AA metadata.
16991     AAInfo = AAInfo.concat(St->getAAInfo());
16992   }
16993 
16994   EVT StoreTy;
16995   if (UseVector) {
16996     unsigned Elts = NumStores * NumMemElts;
16997     // Get the type for the merged vector store.
16998     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16999   } else
17000     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
17001 
17002   SDValue StoredVal;
17003   if (UseVector) {
17004     if (IsConstantSrc) {
17005       SmallVector<SDValue, 8> BuildVector;
17006       for (unsigned I = 0; I != NumStores; ++I) {
17007         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17008         SDValue Val = St->getValue();
17009         // If constant is of the wrong type, convert it now.
17010         if (MemVT != Val.getValueType()) {
17011           Val = peekThroughBitcasts(Val);
17012           // Deal with constants of wrong size.
17013           if (ElementSizeBits != Val.getValueSizeInBits()) {
17014             EVT IntMemVT =
17015                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
17016             if (isa<ConstantFPSDNode>(Val)) {
17017               // Not clear how to truncate FP values.
17018               return false;
17019             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
17020               Val = DAG.getConstant(C->getAPIntValue()
17021                                         .zextOrTrunc(Val.getValueSizeInBits())
17022                                         .zextOrTrunc(ElementSizeBits),
17023                                     SDLoc(C), IntMemVT);
17024           }
17025           // Make sure correctly size type is the correct type.
17026           Val = DAG.getBitcast(MemVT, Val);
17027         }
17028         BuildVector.push_back(Val);
17029       }
17030       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17031                                                : ISD::BUILD_VECTOR,
17032                               DL, StoreTy, BuildVector);
17033     } else {
17034       SmallVector<SDValue, 8> Ops;
17035       for (unsigned i = 0; i < NumStores; ++i) {
17036         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17037         SDValue Val = peekThroughBitcasts(St->getValue());
17038         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17039         // type MemVT. If the underlying value is not the correct
17040         // type, but it is an extraction of an appropriate vector we
17041         // can recast Val to be of the correct type. This may require
17042         // converting between EXTRACT_VECTOR_ELT and
17043         // EXTRACT_SUBVECTOR.
17044         if ((MemVT != Val.getValueType()) &&
17045             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17046              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17047           EVT MemVTScalarTy = MemVT.getScalarType();
17048           // We may need to add a bitcast here to get types to line up.
17049           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17050             Val = DAG.getBitcast(MemVT, Val);
17051           } else {
17052             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17053                                             : ISD::EXTRACT_VECTOR_ELT;
17054             SDValue Vec = Val.getOperand(0);
17055             SDValue Idx = Val.getOperand(1);
17056             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17057           }
17058         }
17059         Ops.push_back(Val);
17060       }
17061 
17062       // Build the extracted vector elements back into a vector.
17063       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17064                                                : ISD::BUILD_VECTOR,
17065                               DL, StoreTy, Ops);
17066     }
17067   } else {
17068     // We should always use a vector store when merging extracted vector
17069     // elements, so this path implies a store of constants.
17070     assert(IsConstantSrc && "Merged vector elements should use vector store");
17071 
17072     APInt StoreInt(SizeInBits, 0);
17073 
17074     // Construct a single integer constant which is made of the smaller
17075     // constant inputs.
17076     bool IsLE = DAG.getDataLayout().isLittleEndian();
17077     for (unsigned i = 0; i < NumStores; ++i) {
17078       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17079       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17080 
17081       SDValue Val = St->getValue();
17082       Val = peekThroughBitcasts(Val);
17083       StoreInt <<= ElementSizeBits;
17084       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17085         StoreInt |= C->getAPIntValue()
17086                         .zextOrTrunc(ElementSizeBits)
17087                         .zextOrTrunc(SizeInBits);
17088       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17089         StoreInt |= C->getValueAPF()
17090                         .bitcastToAPInt()
17091                         .zextOrTrunc(ElementSizeBits)
17092                         .zextOrTrunc(SizeInBits);
17093         // If fp truncation is necessary give up for now.
17094         if (MemVT.getSizeInBits() != ElementSizeBits)
17095           return false;
17096       } else {
17097         llvm_unreachable("Invalid constant element type");
17098       }
17099     }
17100 
17101     // Create the new Load and Store operations.
17102     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17103   }
17104 
17105   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17106   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17107 
17108   // make sure we use trunc store if it's necessary to be legal.
17109   SDValue NewStore;
17110   if (!UseTrunc) {
17111     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17112                             FirstInChain->getPointerInfo(),
17113                             FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17114   } else { // Must be realized as a trunc store
17115     EVT LegalizedStoredValTy =
17116         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17117     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17118     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17119     SDValue ExtendedStoreVal =
17120         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17121                         LegalizedStoredValTy);
17122     NewStore = DAG.getTruncStore(
17123         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17124         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17125         FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17126   }
17127 
17128   // Replace all merged stores with the new store.
17129   for (unsigned i = 0; i < NumStores; ++i)
17130     CombineTo(StoreNodes[i].MemNode, NewStore);
17131 
17132   AddToWorklist(NewChain.getNode());
17133   return true;
17134 }
17135 
17136 void DAGCombiner::getStoreMergeCandidates(
17137     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17138     SDNode *&RootNode) {
17139   // This holds the base pointer, index, and the offset in bytes from the base
17140   // pointer. We must have a base and an offset. Do not handle stores to undef
17141   // base pointers.
17142   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17143   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17144     return;
17145 
17146   SDValue Val = peekThroughBitcasts(St->getValue());
17147   StoreSource StoreSrc = getStoreSource(Val);
17148   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
17149 
17150   // Match on loadbaseptr if relevant.
17151   EVT MemVT = St->getMemoryVT();
17152   BaseIndexOffset LBasePtr;
17153   EVT LoadVT;
17154   if (StoreSrc == StoreSource::Load) {
17155     auto *Ld = cast<LoadSDNode>(Val);
17156     LBasePtr = BaseIndexOffset::match(Ld, DAG);
17157     LoadVT = Ld->getMemoryVT();
17158     // Load and store should be the same type.
17159     if (MemVT != LoadVT)
17160       return;
17161     // Loads must only have one use.
17162     if (!Ld->hasNUsesOfValue(1, 0))
17163       return;
17164     // The memory operands must not be volatile/indexed/atomic.
17165     // TODO: May be able to relax for unordered atomics (see D66309)
17166     if (!Ld->isSimple() || Ld->isIndexed())
17167       return;
17168   }
17169   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17170                             int64_t &Offset) -> bool {
17171     // The memory operands must not be volatile/indexed/atomic.
17172     // TODO: May be able to relax for unordered atomics (see D66309)
17173     if (!Other->isSimple() || Other->isIndexed())
17174       return false;
17175     // Don't mix temporal stores with non-temporal stores.
17176     if (St->isNonTemporal() != Other->isNonTemporal())
17177       return false;
17178     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17179     // Allow merging constants of different types as integers.
17180     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17181                                            : Other->getMemoryVT() != MemVT;
17182     switch (StoreSrc) {
17183     case StoreSource::Load: {
17184       if (NoTypeMatch)
17185         return false;
17186       // The Load's Base Ptr must also match.
17187       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17188       if (!OtherLd)
17189         return false;
17190       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17191       if (LoadVT != OtherLd->getMemoryVT())
17192         return false;
17193       // Loads must only have one use.
17194       if (!OtherLd->hasNUsesOfValue(1, 0))
17195         return false;
17196       // The memory operands must not be volatile/indexed/atomic.
17197       // TODO: May be able to relax for unordered atomics (see D66309)
17198       if (!OtherLd->isSimple() || OtherLd->isIndexed())
17199         return false;
17200       // Don't mix temporal loads with non-temporal loads.
17201       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17202         return false;
17203       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17204         return false;
17205       break;
17206     }
17207     case StoreSource::Constant:
17208       if (NoTypeMatch)
17209         return false;
17210       if (!isIntOrFPConstant(OtherBC))
17211         return false;
17212       break;
17213     case StoreSource::Extract:
17214       // Do not merge truncated stores here.
17215       if (Other->isTruncatingStore())
17216         return false;
17217       if (!MemVT.bitsEq(OtherBC.getValueType()))
17218         return false;
17219       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17220           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17221         return false;
17222       break;
17223     default:
17224       llvm_unreachable("Unhandled store source for merging");
17225     }
17226     Ptr = BaseIndexOffset::match(Other, DAG);
17227     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17228   };
17229 
17230   // Check if the pair of StoreNode and the RootNode already bail out many
17231   // times which is over the limit in dependence check.
17232   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17233                                         SDNode *RootNode) -> bool {
17234     auto RootCount = StoreRootCountMap.find(StoreNode);
17235     return RootCount != StoreRootCountMap.end() &&
17236            RootCount->second.first == RootNode &&
17237            RootCount->second.second > StoreMergeDependenceLimit;
17238   };
17239 
17240   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17241     // This must be a chain use.
17242     if (UseIter.getOperandNo() != 0)
17243       return;
17244     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17245       BaseIndexOffset Ptr;
17246       int64_t PtrDiff;
17247       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17248           !OverLimitInDependenceCheck(OtherStore, RootNode))
17249         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17250     }
17251   };
17252 
17253   // We looking for a root node which is an ancestor to all mergable
17254   // stores. We search up through a load, to our root and then down
17255   // through all children. For instance we will find Store{1,2,3} if
17256   // St is Store1, Store2. or Store3 where the root is not a load
17257   // which always true for nonvolatile ops. TODO: Expand
17258   // the search to find all valid candidates through multiple layers of loads.
17259   //
17260   // Root
17261   // |-------|-------|
17262   // Load    Load    Store3
17263   // |       |
17264   // Store1   Store2
17265   //
17266   // FIXME: We should be able to climb and
17267   // descend TokenFactors to find candidates as well.
17268 
17269   RootNode = St->getChain().getNode();
17270 
17271   unsigned NumNodesExplored = 0;
17272   const unsigned MaxSearchNodes = 1024;
17273   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17274     RootNode = Ldn->getChain().getNode();
17275     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17276          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17277       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17278         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17279           TryToAddCandidate(I2);
17280       }
17281     }
17282   } else {
17283     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17284          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17285       TryToAddCandidate(I);
17286   }
17287 }
17288 
17289 // We need to check that merging these stores does not cause a loop in
17290 // the DAG. Any store candidate may depend on another candidate
17291 // indirectly through its operand (we already consider dependencies
17292 // through the chain). Check in parallel by searching up from
17293 // non-chain operands of candidates.
17294 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17295     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17296     SDNode *RootNode) {
17297   // FIXME: We should be able to truncate a full search of
17298   // predecessors by doing a BFS and keeping tabs the originating
17299   // stores from which worklist nodes come from in a similar way to
17300   // TokenFactor simplfication.
17301 
17302   SmallPtrSet<const SDNode *, 32> Visited;
17303   SmallVector<const SDNode *, 8> Worklist;
17304 
17305   // RootNode is a predecessor to all candidates so we need not search
17306   // past it. Add RootNode (peeking through TokenFactors). Do not count
17307   // these towards size check.
17308 
17309   Worklist.push_back(RootNode);
17310   while (!Worklist.empty()) {
17311     auto N = Worklist.pop_back_val();
17312     if (!Visited.insert(N).second)
17313       continue; // Already present in Visited.
17314     if (N->getOpcode() == ISD::TokenFactor) {
17315       for (SDValue Op : N->ops())
17316         Worklist.push_back(Op.getNode());
17317     }
17318   }
17319 
17320   // Don't count pruning nodes towards max.
17321   unsigned int Max = 1024 + Visited.size();
17322   // Search Ops of store candidates.
17323   for (unsigned i = 0; i < NumStores; ++i) {
17324     SDNode *N = StoreNodes[i].MemNode;
17325     // Of the 4 Store Operands:
17326     //   * Chain (Op 0) -> We have already considered these
17327     //                    in candidate selection and can be
17328     //                    safely ignored
17329     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17330     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17331     //                       but aren't necessarily fromt the same base node, so
17332     //                       cycles possible (e.g. via indexed store).
17333     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17334     //               non-indexed stores). Not constant on all targets (e.g. ARM)
17335     //               and so can participate in a cycle.
17336     for (unsigned j = 1; j < N->getNumOperands(); ++j)
17337       Worklist.push_back(N->getOperand(j).getNode());
17338   }
17339   // Search through DAG. We can stop early if we find a store node.
17340   for (unsigned i = 0; i < NumStores; ++i)
17341     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17342                                      Max)) {
17343       // If the searching bail out, record the StoreNode and RootNode in the
17344       // StoreRootCountMap. If we have seen the pair many times over a limit,
17345       // we won't add the StoreNode into StoreNodes set again.
17346       if (Visited.size() >= Max) {
17347         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17348         if (RootCount.first == RootNode)
17349           RootCount.second++;
17350         else
17351           RootCount = {RootNode, 1};
17352       }
17353       return false;
17354     }
17355   return true;
17356 }
17357 
17358 unsigned
17359 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17360                                   int64_t ElementSizeBytes) const {
17361   while (true) {
17362     // Find a store past the width of the first store.
17363     size_t StartIdx = 0;
17364     while ((StartIdx + 1 < StoreNodes.size()) &&
17365            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17366               StoreNodes[StartIdx + 1].OffsetFromBase)
17367       ++StartIdx;
17368 
17369     // Bail if we don't have enough candidates to merge.
17370     if (StartIdx + 1 >= StoreNodes.size())
17371       return 0;
17372 
17373     // Trim stores that overlapped with the first store.
17374     if (StartIdx)
17375       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17376 
17377     // Scan the memory operations on the chain and find the first
17378     // non-consecutive store memory address.
17379     unsigned NumConsecutiveStores = 1;
17380     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17381     // Check that the addresses are consecutive starting from the second
17382     // element in the list of stores.
17383     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17384       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17385       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17386         break;
17387       NumConsecutiveStores = i + 1;
17388     }
17389     if (NumConsecutiveStores > 1)
17390       return NumConsecutiveStores;
17391 
17392     // There are no consecutive stores at the start of the list.
17393     // Remove the first store and try again.
17394     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17395   }
17396 }
17397 
17398 bool DAGCombiner::tryStoreMergeOfConstants(
17399     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17400     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17401   LLVMContext &Context = *DAG.getContext();
17402   const DataLayout &DL = DAG.getDataLayout();
17403   int64_t ElementSizeBytes = MemVT.getStoreSize();
17404   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17405   bool MadeChange = false;
17406 
17407   // Store the constants into memory as one consecutive store.
17408   while (NumConsecutiveStores >= 2) {
17409     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17410     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17411     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17412     unsigned LastLegalType = 1;
17413     unsigned LastLegalVectorType = 1;
17414     bool LastIntegerTrunc = false;
17415     bool NonZero = false;
17416     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17417     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17418       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17419       SDValue StoredVal = ST->getValue();
17420       bool IsElementZero = false;
17421       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17422         IsElementZero = C->isZero();
17423       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17424         IsElementZero = C->getConstantFPValue()->isNullValue();
17425       if (IsElementZero) {
17426         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17427           FirstZeroAfterNonZero = i;
17428       }
17429       NonZero |= !IsElementZero;
17430 
17431       // Find a legal type for the constant store.
17432       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17433       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17434       bool IsFast = false;
17435 
17436       // Break early when size is too large to be legal.
17437       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17438         break;
17439 
17440       if (TLI.isTypeLegal(StoreTy) &&
17441           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17442                                DAG.getMachineFunction()) &&
17443           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17444                                  *FirstInChain->getMemOperand(), &IsFast) &&
17445           IsFast) {
17446         LastIntegerTrunc = false;
17447         LastLegalType = i + 1;
17448         // Or check whether a truncstore is legal.
17449       } else if (TLI.getTypeAction(Context, StoreTy) ==
17450                  TargetLowering::TypePromoteInteger) {
17451         EVT LegalizedStoredValTy =
17452             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17453         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17454             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17455                                  DAG.getMachineFunction()) &&
17456             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17457                                    *FirstInChain->getMemOperand(), &IsFast) &&
17458             IsFast) {
17459           LastIntegerTrunc = true;
17460           LastLegalType = i + 1;
17461         }
17462       }
17463 
17464       // We only use vectors if the constant is known to be zero or the
17465       // target allows it and the function is not marked with the
17466       // noimplicitfloat attribute.
17467       if ((!NonZero ||
17468            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17469           AllowVectors) {
17470         // Find a legal type for the vector store.
17471         unsigned Elts = (i + 1) * NumMemElts;
17472         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17473         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17474             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17475             TLI.allowsMemoryAccess(Context, DL, Ty,
17476                                    *FirstInChain->getMemOperand(), &IsFast) &&
17477             IsFast)
17478           LastLegalVectorType = i + 1;
17479       }
17480     }
17481 
17482     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17483     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17484     bool UseTrunc = LastIntegerTrunc && !UseVector;
17485 
17486     // Check if we found a legal integer type that creates a meaningful
17487     // merge.
17488     if (NumElem < 2) {
17489       // We know that candidate stores are in order and of correct
17490       // shape. While there is no mergeable sequence from the
17491       // beginning one may start later in the sequence. The only
17492       // reason a merge of size N could have failed where another of
17493       // the same size would not have, is if the alignment has
17494       // improved or we've dropped a non-zero value. Drop as many
17495       // candidates as we can here.
17496       unsigned NumSkip = 1;
17497       while ((NumSkip < NumConsecutiveStores) &&
17498              (NumSkip < FirstZeroAfterNonZero) &&
17499              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17500         NumSkip++;
17501 
17502       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17503       NumConsecutiveStores -= NumSkip;
17504       continue;
17505     }
17506 
17507     // Check that we can merge these candidates without causing a cycle.
17508     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17509                                                   RootNode)) {
17510       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17511       NumConsecutiveStores -= NumElem;
17512       continue;
17513     }
17514 
17515     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
17516                                                   /*IsConstantSrc*/ true,
17517                                                   UseVector, UseTrunc);
17518 
17519     // Remove merged stores for next iteration.
17520     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17521     NumConsecutiveStores -= NumElem;
17522   }
17523   return MadeChange;
17524 }
17525 
17526 bool DAGCombiner::tryStoreMergeOfExtracts(
17527     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17528     EVT MemVT, SDNode *RootNode) {
17529   LLVMContext &Context = *DAG.getContext();
17530   const DataLayout &DL = DAG.getDataLayout();
17531   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17532   bool MadeChange = false;
17533 
17534   // Loop on Consecutive Stores on success.
17535   while (NumConsecutiveStores >= 2) {
17536     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17537     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17538     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17539     unsigned NumStoresToMerge = 1;
17540     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17541       // Find a legal type for the vector store.
17542       unsigned Elts = (i + 1) * NumMemElts;
17543       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17544       bool IsFast = false;
17545 
17546       // Break early when size is too large to be legal.
17547       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17548         break;
17549 
17550       if (TLI.isTypeLegal(Ty) &&
17551           TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17552           TLI.allowsMemoryAccess(Context, DL, Ty,
17553                                  *FirstInChain->getMemOperand(), &IsFast) &&
17554           IsFast)
17555         NumStoresToMerge = i + 1;
17556     }
17557 
17558     // Check if we found a legal integer type creating a meaningful
17559     // merge.
17560     if (NumStoresToMerge < 2) {
17561       // We know that candidate stores are in order and of correct
17562       // shape. While there is no mergeable sequence from the
17563       // beginning one may start later in the sequence. The only
17564       // reason a merge of size N could have failed where another of
17565       // the same size would not have, is if the alignment has
17566       // improved. Drop as many candidates as we can here.
17567       unsigned NumSkip = 1;
17568       while ((NumSkip < NumConsecutiveStores) &&
17569              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17570         NumSkip++;
17571 
17572       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17573       NumConsecutiveStores -= NumSkip;
17574       continue;
17575     }
17576 
17577     // Check that we can merge these candidates without causing a cycle.
17578     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17579                                                   RootNode)) {
17580       StoreNodes.erase(StoreNodes.begin(),
17581                        StoreNodes.begin() + NumStoresToMerge);
17582       NumConsecutiveStores -= NumStoresToMerge;
17583       continue;
17584     }
17585 
17586     MadeChange |= mergeStoresOfConstantsOrVecElts(
17587         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17588         /*UseVector*/ true, /*UseTrunc*/ false);
17589 
17590     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17591     NumConsecutiveStores -= NumStoresToMerge;
17592   }
17593   return MadeChange;
17594 }
17595 
17596 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17597                                        unsigned NumConsecutiveStores, EVT MemVT,
17598                                        SDNode *RootNode, bool AllowVectors,
17599                                        bool IsNonTemporalStore,
17600                                        bool IsNonTemporalLoad) {
17601   LLVMContext &Context = *DAG.getContext();
17602   const DataLayout &DL = DAG.getDataLayout();
17603   int64_t ElementSizeBytes = MemVT.getStoreSize();
17604   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17605   bool MadeChange = false;
17606 
17607   // Look for load nodes which are used by the stored values.
17608   SmallVector<MemOpLink, 8> LoadNodes;
17609 
17610   // Find acceptable loads. Loads need to have the same chain (token factor),
17611   // must not be zext, volatile, indexed, and they must be consecutive.
17612   BaseIndexOffset LdBasePtr;
17613 
17614   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17615     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17616     SDValue Val = peekThroughBitcasts(St->getValue());
17617     LoadSDNode *Ld = cast<LoadSDNode>(Val);
17618 
17619     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17620     // If this is not the first ptr that we check.
17621     int64_t LdOffset = 0;
17622     if (LdBasePtr.getBase().getNode()) {
17623       // The base ptr must be the same.
17624       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17625         break;
17626     } else {
17627       // Check that all other base pointers are the same as this one.
17628       LdBasePtr = LdPtr;
17629     }
17630 
17631     // We found a potential memory operand to merge.
17632     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17633   }
17634 
17635   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17636     Align RequiredAlignment;
17637     bool NeedRotate = false;
17638     if (LoadNodes.size() == 2) {
17639       // If we have load/store pair instructions and we only have two values,
17640       // don't bother merging.
17641       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17642           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17643         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17644         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17645         break;
17646       }
17647       // If the loads are reversed, see if we can rotate the halves into place.
17648       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17649       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17650       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17651       if (Offset0 - Offset1 == ElementSizeBytes &&
17652           (hasOperation(ISD::ROTL, PairVT) ||
17653            hasOperation(ISD::ROTR, PairVT))) {
17654         std::swap(LoadNodes[0], LoadNodes[1]);
17655         NeedRotate = true;
17656       }
17657     }
17658     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17659     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17660     Align FirstStoreAlign = FirstInChain->getAlign();
17661     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17662 
17663     // Scan the memory operations on the chain and find the first
17664     // non-consecutive load memory address. These variables hold the index in
17665     // the store node array.
17666 
17667     unsigned LastConsecutiveLoad = 1;
17668 
17669     // This variable refers to the size and not index in the array.
17670     unsigned LastLegalVectorType = 1;
17671     unsigned LastLegalIntegerType = 1;
17672     bool isDereferenceable = true;
17673     bool DoIntegerTruncate = false;
17674     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
17675     SDValue LoadChain = FirstLoad->getChain();
17676     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17677       // All loads must share the same chain.
17678       if (LoadNodes[i].MemNode->getChain() != LoadChain)
17679         break;
17680 
17681       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17682       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17683         break;
17684       LastConsecutiveLoad = i;
17685 
17686       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17687         isDereferenceable = false;
17688 
17689       // Find a legal type for the vector store.
17690       unsigned Elts = (i + 1) * NumMemElts;
17691       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17692 
17693       // Break early when size is too large to be legal.
17694       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17695         break;
17696 
17697       bool IsFastSt = false;
17698       bool IsFastLd = false;
17699       // Don't try vector types if we need a rotate. We may still fail the
17700       // legality checks for the integer type, but we can't handle the rotate
17701       // case with vectors.
17702       // FIXME: We could use a shuffle in place of the rotate.
17703       if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
17704           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17705                                DAG.getMachineFunction()) &&
17706           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17707                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17708           IsFastSt &&
17709           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17710                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17711           IsFastLd) {
17712         LastLegalVectorType = i + 1;
17713       }
17714 
17715       // Find a legal type for the integer store.
17716       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17717       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17718       if (TLI.isTypeLegal(StoreTy) &&
17719           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17720                                DAG.getMachineFunction()) &&
17721           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17722                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17723           IsFastSt &&
17724           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17725                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17726           IsFastLd) {
17727         LastLegalIntegerType = i + 1;
17728         DoIntegerTruncate = false;
17729         // Or check whether a truncstore and extload is legal.
17730       } else if (TLI.getTypeAction(Context, StoreTy) ==
17731                  TargetLowering::TypePromoteInteger) {
17732         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17733         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17734             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17735                                  DAG.getMachineFunction()) &&
17736             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17737             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17738             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17739             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17740                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
17741             IsFastSt &&
17742             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17743                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
17744             IsFastLd) {
17745           LastLegalIntegerType = i + 1;
17746           DoIntegerTruncate = true;
17747         }
17748       }
17749     }
17750 
17751     // Only use vector types if the vector type is larger than the integer
17752     // type. If they are the same, use integers.
17753     bool UseVectorTy =
17754         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
17755     unsigned LastLegalType =
17756         std::max(LastLegalVectorType, LastLegalIntegerType);
17757 
17758     // We add +1 here because the LastXXX variables refer to location while
17759     // the NumElem refers to array/index size.
17760     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
17761     NumElem = std::min(LastLegalType, NumElem);
17762     Align FirstLoadAlign = FirstLoad->getAlign();
17763 
17764     if (NumElem < 2) {
17765       // We know that candidate stores are in order and of correct
17766       // shape. While there is no mergeable sequence from the
17767       // beginning one may start later in the sequence. The only
17768       // reason a merge of size N could have failed where another of
17769       // the same size would not have is if the alignment or either
17770       // the load or store has improved. Drop as many candidates as we
17771       // can here.
17772       unsigned NumSkip = 1;
17773       while ((NumSkip < LoadNodes.size()) &&
17774              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
17775              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
17776         NumSkip++;
17777       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17778       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
17779       NumConsecutiveStores -= NumSkip;
17780       continue;
17781     }
17782 
17783     // Check that we can merge these candidates without causing a cycle.
17784     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17785                                                   RootNode)) {
17786       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17787       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17788       NumConsecutiveStores -= NumElem;
17789       continue;
17790     }
17791 
17792     // Find if it is better to use vectors or integers to load and store
17793     // to memory.
17794     EVT JointMemOpVT;
17795     if (UseVectorTy) {
17796       // Find a legal type for the vector store.
17797       unsigned Elts = NumElem * NumMemElts;
17798       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17799     } else {
17800       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
17801       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
17802     }
17803 
17804     SDLoc LoadDL(LoadNodes[0].MemNode);
17805     SDLoc StoreDL(StoreNodes[0].MemNode);
17806 
17807     // The merged loads are required to have the same incoming chain, so
17808     // using the first's chain is acceptable.
17809 
17810     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
17811     AddToWorklist(NewStoreChain.getNode());
17812 
17813     MachineMemOperand::Flags LdMMOFlags =
17814         isDereferenceable ? MachineMemOperand::MODereferenceable
17815                           : MachineMemOperand::MONone;
17816     if (IsNonTemporalLoad)
17817       LdMMOFlags |= MachineMemOperand::MONonTemporal;
17818 
17819     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
17820                                               ? MachineMemOperand::MONonTemporal
17821                                               : MachineMemOperand::MONone;
17822 
17823     SDValue NewLoad, NewStore;
17824     if (UseVectorTy || !DoIntegerTruncate) {
17825       NewLoad = DAG.getLoad(
17826           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
17827           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
17828       SDValue StoreOp = NewLoad;
17829       if (NeedRotate) {
17830         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
17831         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
17832                "Unexpected type for rotate-able load pair");
17833         SDValue RotAmt =
17834             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
17835         // Target can convert to the identical ROTR if it does not have ROTL.
17836         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
17837       }
17838       NewStore = DAG.getStore(
17839           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
17840           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
17841     } else { // This must be the truncstore/extload case
17842       EVT ExtendedTy =
17843           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
17844       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
17845                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
17846                                FirstLoad->getPointerInfo(), JointMemOpVT,
17847                                FirstLoadAlign, LdMMOFlags);
17848       NewStore = DAG.getTruncStore(
17849           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
17850           FirstInChain->getPointerInfo(), JointMemOpVT,
17851           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17852     }
17853 
17854     // Transfer chain users from old loads to the new load.
17855     for (unsigned i = 0; i < NumElem; ++i) {
17856       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
17857       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
17858                                     SDValue(NewLoad.getNode(), 1));
17859     }
17860 
17861     // Replace all stores with the new store. Recursively remove corresponding
17862     // values if they are no longer used.
17863     for (unsigned i = 0; i < NumElem; ++i) {
17864       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
17865       CombineTo(StoreNodes[i].MemNode, NewStore);
17866       if (Val.getNode()->use_empty())
17867         recursivelyDeleteUnusedNodes(Val.getNode());
17868     }
17869 
17870     MadeChange = true;
17871     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17872     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17873     NumConsecutiveStores -= NumElem;
17874   }
17875   return MadeChange;
17876 }
17877 
17878 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17879   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
17880     return false;
17881 
17882   // TODO: Extend this function to merge stores of scalable vectors.
17883   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17884   // store since we know <vscale x 16 x i8> is exactly twice as large as
17885   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17886   EVT MemVT = St->getMemoryVT();
17887   if (MemVT.isScalableVector())
17888     return false;
17889   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17890     return false;
17891 
17892   // This function cannot currently deal with non-byte-sized memory sizes.
17893   int64_t ElementSizeBytes = MemVT.getStoreSize();
17894   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17895     return false;
17896 
17897   // Do not bother looking at stored values that are not constants, loads, or
17898   // extracted vector elements.
17899   SDValue StoredVal = peekThroughBitcasts(St->getValue());
17900   const StoreSource StoreSrc = getStoreSource(StoredVal);
17901   if (StoreSrc == StoreSource::Unknown)
17902     return false;
17903 
17904   SmallVector<MemOpLink, 8> StoreNodes;
17905   SDNode *RootNode;
17906   // Find potential store merge candidates by searching through chain sub-DAG
17907   getStoreMergeCandidates(St, StoreNodes, RootNode);
17908 
17909   // Check if there is anything to merge.
17910   if (StoreNodes.size() < 2)
17911     return false;
17912 
17913   // Sort the memory operands according to their distance from the
17914   // base pointer.
17915   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17916     return LHS.OffsetFromBase < RHS.OffsetFromBase;
17917   });
17918 
17919   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17920       Attribute::NoImplicitFloat);
17921   bool IsNonTemporalStore = St->isNonTemporal();
17922   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17923                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
17924 
17925   // Store Merge attempts to merge the lowest stores. This generally
17926   // works out as if successful, as the remaining stores are checked
17927   // after the first collection of stores is merged. However, in the
17928   // case that a non-mergeable store is found first, e.g., {p[-2],
17929   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17930   // mergeable cases. To prevent this, we prune such stores from the
17931   // front of StoreNodes here.
17932   bool MadeChange = false;
17933   while (StoreNodes.size() > 1) {
17934     unsigned NumConsecutiveStores =
17935         getConsecutiveStores(StoreNodes, ElementSizeBytes);
17936     // There are no more stores in the list to examine.
17937     if (NumConsecutiveStores == 0)
17938       return MadeChange;
17939 
17940     // We have at least 2 consecutive stores. Try to merge them.
17941     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
17942     switch (StoreSrc) {
17943     case StoreSource::Constant:
17944       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17945                                              MemVT, RootNode, AllowVectors);
17946       break;
17947 
17948     case StoreSource::Extract:
17949       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17950                                             MemVT, RootNode);
17951       break;
17952 
17953     case StoreSource::Load:
17954       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17955                                          MemVT, RootNode, AllowVectors,
17956                                          IsNonTemporalStore, IsNonTemporalLoad);
17957       break;
17958 
17959     default:
17960       llvm_unreachable("Unhandled store source type");
17961     }
17962   }
17963   return MadeChange;
17964 }
17965 
17966 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17967   SDLoc SL(ST);
17968   SDValue ReplStore;
17969 
17970   // Replace the chain to avoid dependency.
17971   if (ST->isTruncatingStore()) {
17972     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17973                                   ST->getBasePtr(), ST->getMemoryVT(),
17974                                   ST->getMemOperand());
17975   } else {
17976     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17977                              ST->getMemOperand());
17978   }
17979 
17980   // Create token to keep both nodes around.
17981   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17982                               MVT::Other, ST->getChain(), ReplStore);
17983 
17984   // Make sure the new and old chains are cleaned up.
17985   AddToWorklist(Token.getNode());
17986 
17987   // Don't add users to work list.
17988   return CombineTo(ST, Token, false);
17989 }
17990 
17991 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17992   SDValue Value = ST->getValue();
17993   if (Value.getOpcode() == ISD::TargetConstantFP)
17994     return SDValue();
17995 
17996   if (!ISD::isNormalStore(ST))
17997     return SDValue();
17998 
17999   SDLoc DL(ST);
18000 
18001   SDValue Chain = ST->getChain();
18002   SDValue Ptr = ST->getBasePtr();
18003 
18004   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
18005 
18006   // NOTE: If the original store is volatile, this transform must not increase
18007   // the number of stores.  For example, on x86-32 an f64 can be stored in one
18008   // processor operation but an i64 (which is not legal) requires two.  So the
18009   // transform should not be done in this case.
18010 
18011   SDValue Tmp;
18012   switch (CFP->getSimpleValueType(0).SimpleTy) {
18013   default:
18014     llvm_unreachable("Unknown FP type");
18015   case MVT::f16:    // We don't do this for these yet.
18016   case MVT::f80:
18017   case MVT::f128:
18018   case MVT::ppcf128:
18019     return SDValue();
18020   case MVT::f32:
18021     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
18022         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18023       ;
18024       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18025                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18026                             MVT::i32);
18027       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18028     }
18029 
18030     return SDValue();
18031   case MVT::f64:
18032     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18033          ST->isSimple()) ||
18034         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
18035       ;
18036       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18037                             getZExtValue(), SDLoc(CFP), MVT::i64);
18038       return DAG.getStore(Chain, DL, Tmp,
18039                           Ptr, ST->getMemOperand());
18040     }
18041 
18042     if (ST->isSimple() &&
18043         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18044       // Many FP stores are not made apparent until after legalize, e.g. for
18045       // argument passing.  Since this is so common, custom legalize the
18046       // 64-bit integer store into two 32-bit stores.
18047       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18048       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18049       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18050       if (DAG.getDataLayout().isBigEndian())
18051         std::swap(Lo, Hi);
18052 
18053       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18054       AAMDNodes AAInfo = ST->getAAInfo();
18055 
18056       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18057                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18058       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18059       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18060                                  ST->getPointerInfo().getWithOffset(4),
18061                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18062       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18063                          St0, St1);
18064     }
18065 
18066     return SDValue();
18067   }
18068 }
18069 
18070 SDValue DAGCombiner::visitSTORE(SDNode *N) {
18071   StoreSDNode *ST  = cast<StoreSDNode>(N);
18072   SDValue Chain = ST->getChain();
18073   SDValue Value = ST->getValue();
18074   SDValue Ptr   = ST->getBasePtr();
18075 
18076   // If this is a store of a bit convert, store the input value if the
18077   // resultant store does not need a higher alignment than the original.
18078   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18079       ST->isUnindexed()) {
18080     EVT SVT = Value.getOperand(0).getValueType();
18081     // If the store is volatile, we only want to change the store type if the
18082     // resulting store is legal. Otherwise we might increase the number of
18083     // memory accesses. We don't care if the original type was legal or not
18084     // as we assume software couldn't rely on the number of accesses of an
18085     // illegal type.
18086     // TODO: May be able to relax for unordered atomics (see D66309)
18087     if (((!LegalOperations && ST->isSimple()) ||
18088          TLI.isOperationLegal(ISD::STORE, SVT)) &&
18089         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18090                                      DAG, *ST->getMemOperand())) {
18091       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18092                           ST->getMemOperand());
18093     }
18094   }
18095 
18096   // Turn 'store undef, Ptr' -> nothing.
18097   if (Value.isUndef() && ST->isUnindexed())
18098     return Chain;
18099 
18100   // Try to infer better alignment information than the store already has.
18101   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18102     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18103       if (*Alignment > ST->getAlign() &&
18104           isAligned(*Alignment, ST->getSrcValueOffset())) {
18105         SDValue NewStore =
18106             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18107                               ST->getMemoryVT(), *Alignment,
18108                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
18109         // NewStore will always be N as we are only refining the alignment
18110         assert(NewStore.getNode() == N);
18111         (void)NewStore;
18112       }
18113     }
18114   }
18115 
18116   // Try transforming a pair floating point load / store ops to integer
18117   // load / store ops.
18118   if (SDValue NewST = TransformFPLoadStorePair(N))
18119     return NewST;
18120 
18121   // Try transforming several stores into STORE (BSWAP).
18122   if (SDValue Store = mergeTruncStores(ST))
18123     return Store;
18124 
18125   if (ST->isUnindexed()) {
18126     // Walk up chain skipping non-aliasing memory nodes, on this store and any
18127     // adjacent stores.
18128     if (findBetterNeighborChains(ST)) {
18129       // replaceStoreChain uses CombineTo, which handled all of the worklist
18130       // manipulation. Return the original node to not do anything else.
18131       return SDValue(ST, 0);
18132     }
18133     Chain = ST->getChain();
18134   }
18135 
18136   // FIXME: is there such a thing as a truncating indexed store?
18137   if (ST->isTruncatingStore() && ST->isUnindexed() &&
18138       Value.getValueType().isInteger() &&
18139       (!isa<ConstantSDNode>(Value) ||
18140        !cast<ConstantSDNode>(Value)->isOpaque())) {
18141     APInt TruncDemandedBits =
18142         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18143                              ST->getMemoryVT().getScalarSizeInBits());
18144 
18145     // See if we can simplify the input to this truncstore with knowledge that
18146     // only the low bits are being used.  For example:
18147     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
18148     AddToWorklist(Value.getNode());
18149     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18150       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18151                                ST->getMemOperand());
18152 
18153     // Otherwise, see if we can simplify the operation with
18154     // SimplifyDemandedBits, which only works if the value has a single use.
18155     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18156       // Re-visit the store if anything changed and the store hasn't been merged
18157       // with another node (N is deleted) SimplifyDemandedBits will add Value's
18158       // node back to the worklist if necessary, but we also need to re-visit
18159       // the Store node itself.
18160       if (N->getOpcode() != ISD::DELETED_NODE)
18161         AddToWorklist(N);
18162       return SDValue(N, 0);
18163     }
18164   }
18165 
18166   // If this is a load followed by a store to the same location, then the store
18167   // is dead/noop.
18168   // TODO: Can relax for unordered atomics (see D66309)
18169   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18170     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18171         ST->isUnindexed() && ST->isSimple() &&
18172         Ld->getAddressSpace() == ST->getAddressSpace() &&
18173         // There can't be any side effects between the load and store, such as
18174         // a call or store.
18175         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18176       // The store is dead, remove it.
18177       return Chain;
18178     }
18179   }
18180 
18181   // TODO: Can relax for unordered atomics (see D66309)
18182   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18183     if (ST->isUnindexed() && ST->isSimple() &&
18184         ST1->isUnindexed() && ST1->isSimple()) {
18185       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
18186           ST->getMemoryVT() == ST1->getMemoryVT() &&
18187           ST->getAddressSpace() == ST1->getAddressSpace()) {
18188         // If this is a store followed by a store with the same value to the
18189         // same location, then the store is dead/noop.
18190         return Chain;
18191       }
18192 
18193       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18194           !ST1->getBasePtr().isUndef() &&
18195           // BaseIndexOffset and the code below requires knowing the size
18196           // of a vector, so bail out if MemoryVT is scalable.
18197           !ST->getMemoryVT().isScalableVector() &&
18198           !ST1->getMemoryVT().isScalableVector() &&
18199           ST->getAddressSpace() == ST1->getAddressSpace()) {
18200         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18201         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18202         unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18203         unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18204         // If this is a store who's preceding store to a subset of the current
18205         // location and no one other node is chained to that store we can
18206         // effectively drop the store. Do not remove stores to undef as they may
18207         // be used as data sinks.
18208         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18209           CombineTo(ST1, ST1->getChain());
18210           return SDValue();
18211         }
18212       }
18213     }
18214   }
18215 
18216   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18217   // truncating store.  We can do this even if this is already a truncstore.
18218   if ((Value.getOpcode() == ISD::FP_ROUND ||
18219        Value.getOpcode() == ISD::TRUNCATE) &&
18220       Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18221       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18222                                ST->getMemoryVT(), LegalOperations)) {
18223     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18224                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
18225   }
18226 
18227   // Always perform this optimization before types are legal. If the target
18228   // prefers, also try this after legalization to catch stores that were created
18229   // by intrinsics or other nodes.
18230   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18231     while (true) {
18232       // There can be multiple store sequences on the same chain.
18233       // Keep trying to merge store sequences until we are unable to do so
18234       // or until we merge the last store on the chain.
18235       bool Changed = mergeConsecutiveStores(ST);
18236       if (!Changed) break;
18237       // Return N as merge only uses CombineTo and no worklist clean
18238       // up is necessary.
18239       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18240         return SDValue(N, 0);
18241     }
18242   }
18243 
18244   // Try transforming N to an indexed store.
18245   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18246     return SDValue(N, 0);
18247 
18248   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18249   //
18250   // Make sure to do this only after attempting to merge stores in order to
18251   //  avoid changing the types of some subset of stores due to visit order,
18252   //  preventing their merging.
18253   if (isa<ConstantFPSDNode>(ST->getValue())) {
18254     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18255       return NewSt;
18256   }
18257 
18258   if (SDValue NewSt = splitMergedValStore(ST))
18259     return NewSt;
18260 
18261   return ReduceLoadOpStoreWidth(N);
18262 }
18263 
18264 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18265   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18266   if (!LifetimeEnd->hasOffset())
18267     return SDValue();
18268 
18269   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18270                                         LifetimeEnd->getOffset(), false);
18271 
18272   // We walk up the chains to find stores.
18273   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18274   while (!Chains.empty()) {
18275     SDValue Chain = Chains.pop_back_val();
18276     if (!Chain.hasOneUse())
18277       continue;
18278     switch (Chain.getOpcode()) {
18279     case ISD::TokenFactor:
18280       for (unsigned Nops = Chain.getNumOperands(); Nops;)
18281         Chains.push_back(Chain.getOperand(--Nops));
18282       break;
18283     case ISD::LIFETIME_START:
18284     case ISD::LIFETIME_END:
18285       // We can forward past any lifetime start/end that can be proven not to
18286       // alias the node.
18287       if (!mayAlias(Chain.getNode(), N))
18288         Chains.push_back(Chain.getOperand(0));
18289       break;
18290     case ISD::STORE: {
18291       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18292       // TODO: Can relax for unordered atomics (see D66309)
18293       if (!ST->isSimple() || ST->isIndexed())
18294         continue;
18295       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18296       // The bounds of a scalable store are not known until runtime, so this
18297       // store cannot be elided.
18298       if (StoreSize.isScalable())
18299         continue;
18300       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18301       // If we store purely within object bounds just before its lifetime ends,
18302       // we can remove the store.
18303       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18304                                    StoreSize.getFixedSize() * 8)) {
18305         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
18306                    dbgs() << "\nwithin LIFETIME_END of : ";
18307                    LifetimeEndBase.dump(); dbgs() << "\n");
18308         CombineTo(ST, ST->getChain());
18309         return SDValue(N, 0);
18310       }
18311     }
18312     }
18313   }
18314   return SDValue();
18315 }
18316 
18317 /// For the instruction sequence of store below, F and I values
18318 /// are bundled together as an i64 value before being stored into memory.
18319 /// Sometimes it is more efficent to generate separate stores for F and I,
18320 /// which can remove the bitwise instructions or sink them to colder places.
18321 ///
18322 ///   (store (or (zext (bitcast F to i32) to i64),
18323 ///              (shl (zext I to i64), 32)), addr)  -->
18324 ///   (store F, addr) and (store I, addr+4)
18325 ///
18326 /// Similarly, splitting for other merged store can also be beneficial, like:
18327 /// For pair of {i32, i32}, i64 store --> two i32 stores.
18328 /// For pair of {i32, i16}, i64 store --> two i32 stores.
18329 /// For pair of {i16, i16}, i32 store --> two i16 stores.
18330 /// For pair of {i16, i8},  i32 store --> two i16 stores.
18331 /// For pair of {i8, i8},   i16 store --> two i8 stores.
18332 ///
18333 /// We allow each target to determine specifically which kind of splitting is
18334 /// supported.
18335 ///
18336 /// The store patterns are commonly seen from the simple code snippet below
18337 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18338 ///   void goo(const std::pair<int, float> &);
18339 ///   hoo() {
18340 ///     ...
18341 ///     goo(std::make_pair(tmp, ftmp));
18342 ///     ...
18343 ///   }
18344 ///
18345 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18346   if (OptLevel == CodeGenOpt::None)
18347     return SDValue();
18348 
18349   // Can't change the number of memory accesses for a volatile store or break
18350   // atomicity for an atomic one.
18351   if (!ST->isSimple())
18352     return SDValue();
18353 
18354   SDValue Val = ST->getValue();
18355   SDLoc DL(ST);
18356 
18357   // Match OR operand.
18358   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18359     return SDValue();
18360 
18361   // Match SHL operand and get Lower and Higher parts of Val.
18362   SDValue Op1 = Val.getOperand(0);
18363   SDValue Op2 = Val.getOperand(1);
18364   SDValue Lo, Hi;
18365   if (Op1.getOpcode() != ISD::SHL) {
18366     std::swap(Op1, Op2);
18367     if (Op1.getOpcode() != ISD::SHL)
18368       return SDValue();
18369   }
18370   Lo = Op2;
18371   Hi = Op1.getOperand(0);
18372   if (!Op1.hasOneUse())
18373     return SDValue();
18374 
18375   // Match shift amount to HalfValBitSize.
18376   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18377   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18378   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18379     return SDValue();
18380 
18381   // Lo and Hi are zero-extended from int with size less equal than 32
18382   // to i64.
18383   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18384       !Lo.getOperand(0).getValueType().isScalarInteger() ||
18385       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18386       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18387       !Hi.getOperand(0).getValueType().isScalarInteger() ||
18388       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18389     return SDValue();
18390 
18391   // Use the EVT of low and high parts before bitcast as the input
18392   // of target query.
18393   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18394                   ? Lo.getOperand(0).getValueType()
18395                   : Lo.getValueType();
18396   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18397                    ? Hi.getOperand(0).getValueType()
18398                    : Hi.getValueType();
18399   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18400     return SDValue();
18401 
18402   // Start to split store.
18403   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18404   AAMDNodes AAInfo = ST->getAAInfo();
18405 
18406   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18407   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18408   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18409   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18410 
18411   SDValue Chain = ST->getChain();
18412   SDValue Ptr = ST->getBasePtr();
18413   // Lower value store.
18414   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18415                              ST->getOriginalAlign(), MMOFlags, AAInfo);
18416   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18417   // Higher value store.
18418   SDValue St1 = DAG.getStore(
18419       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18420       ST->getOriginalAlign(), MMOFlags, AAInfo);
18421   return St1;
18422 }
18423 
18424 /// Convert a disguised subvector insertion into a shuffle:
18425 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18426   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
18427          "Expected extract_vector_elt");
18428   SDValue InsertVal = N->getOperand(1);
18429   SDValue Vec = N->getOperand(0);
18430 
18431   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18432   // InsIndex)
18433   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
18434   //   CONCAT_VECTORS.
18435   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18436       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18437       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18438     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18439     ArrayRef<int> Mask = SVN->getMask();
18440 
18441     SDValue X = Vec.getOperand(0);
18442     SDValue Y = Vec.getOperand(1);
18443 
18444     // Vec's operand 0 is using indices from 0 to N-1 and
18445     // operand 1 from N to 2N - 1, where N is the number of
18446     // elements in the vectors.
18447     SDValue InsertVal0 = InsertVal.getOperand(0);
18448     int ElementOffset = -1;
18449 
18450     // We explore the inputs of the shuffle in order to see if we find the
18451     // source of the extract_vector_elt. If so, we can use it to modify the
18452     // shuffle rather than perform an insert_vector_elt.
18453     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18454     ArgWorkList.emplace_back(Mask.size(), Y);
18455     ArgWorkList.emplace_back(0, X);
18456 
18457     while (!ArgWorkList.empty()) {
18458       int ArgOffset;
18459       SDValue ArgVal;
18460       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18461 
18462       if (ArgVal == InsertVal0) {
18463         ElementOffset = ArgOffset;
18464         break;
18465       }
18466 
18467       // Peek through concat_vector.
18468       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18469         int CurrentArgOffset =
18470             ArgOffset + ArgVal.getValueType().getVectorNumElements();
18471         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18472         for (SDValue Op : reverse(ArgVal->ops())) {
18473           CurrentArgOffset -= Step;
18474           ArgWorkList.emplace_back(CurrentArgOffset, Op);
18475         }
18476 
18477         // Make sure we went through all the elements and did not screw up index
18478         // computation.
18479         assert(CurrentArgOffset == ArgOffset);
18480       }
18481     }
18482 
18483     if (ElementOffset != -1) {
18484       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18485 
18486       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18487       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18488       assert(NewMask[InsIndex] <
18489                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
18490              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
18491 
18492       SDValue LegalShuffle =
18493               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18494                                           Y, NewMask, DAG);
18495       if (LegalShuffle)
18496         return LegalShuffle;
18497     }
18498   }
18499 
18500   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18501   // bitcast(shuffle (bitcast V), (extended X), Mask)
18502   // Note: We do not use an insert_subvector node because that requires a
18503   // legal subvector type.
18504   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18505       !InsertVal.getOperand(0).getValueType().isVector())
18506     return SDValue();
18507 
18508   SDValue SubVec = InsertVal.getOperand(0);
18509   SDValue DestVec = N->getOperand(0);
18510   EVT SubVecVT = SubVec.getValueType();
18511   EVT VT = DestVec.getValueType();
18512   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18513   // If the source only has a single vector element, the cost of creating adding
18514   // it to a vector is likely to exceed the cost of a insert_vector_elt.
18515   if (NumSrcElts == 1)
18516     return SDValue();
18517   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18518   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18519 
18520   // Step 1: Create a shuffle mask that implements this insert operation. The
18521   // vector that we are inserting into will be operand 0 of the shuffle, so
18522   // those elements are just 'i'. The inserted subvector is in the first
18523   // positions of operand 1 of the shuffle. Example:
18524   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18525   SmallVector<int, 16> Mask(NumMaskVals);
18526   for (unsigned i = 0; i != NumMaskVals; ++i) {
18527     if (i / NumSrcElts == InsIndex)
18528       Mask[i] = (i % NumSrcElts) + NumMaskVals;
18529     else
18530       Mask[i] = i;
18531   }
18532 
18533   // Bail out if the target can not handle the shuffle we want to create.
18534   EVT SubVecEltVT = SubVecVT.getVectorElementType();
18535   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18536   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18537     return SDValue();
18538 
18539   // Step 2: Create a wide vector from the inserted source vector by appending
18540   // undefined elements. This is the same size as our destination vector.
18541   SDLoc DL(N);
18542   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18543   ConcatOps[0] = SubVec;
18544   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18545 
18546   // Step 3: Shuffle in the padded subvector.
18547   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18548   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18549   AddToWorklist(PaddedSubV.getNode());
18550   AddToWorklist(DestVecBC.getNode());
18551   AddToWorklist(Shuf.getNode());
18552   return DAG.getBitcast(VT, Shuf);
18553 }
18554 
18555 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18556   SDValue InVec = N->getOperand(0);
18557   SDValue InVal = N->getOperand(1);
18558   SDValue EltNo = N->getOperand(2);
18559   SDLoc DL(N);
18560 
18561   EVT VT = InVec.getValueType();
18562   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18563 
18564   // Insert into out-of-bounds element is undefined.
18565   if (IndexC && VT.isFixedLengthVector() &&
18566       IndexC->getZExtValue() >= VT.getVectorNumElements())
18567     return DAG.getUNDEF(VT);
18568 
18569   // Remove redundant insertions:
18570   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18571   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18572       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18573     return InVec;
18574 
18575   if (!IndexC) {
18576     // If this is variable insert to undef vector, it might be better to splat:
18577     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18578     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18579       if (VT.isScalableVector())
18580         return DAG.getSplatVector(VT, DL, InVal);
18581       else {
18582         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18583         return DAG.getBuildVector(VT, DL, Ops);
18584       }
18585     }
18586     return SDValue();
18587   }
18588 
18589   if (VT.isScalableVector())
18590     return SDValue();
18591 
18592   unsigned NumElts = VT.getVectorNumElements();
18593 
18594   // We must know which element is being inserted for folds below here.
18595   unsigned Elt = IndexC->getZExtValue();
18596   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18597     return Shuf;
18598 
18599   // Canonicalize insert_vector_elt dag nodes.
18600   // Example:
18601   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18602   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18603   //
18604   // Do this only if the child insert_vector node has one use; also
18605   // do this only if indices are both constants and Idx1 < Idx0.
18606   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18607       && isa<ConstantSDNode>(InVec.getOperand(2))) {
18608     unsigned OtherElt = InVec.getConstantOperandVal(2);
18609     if (Elt < OtherElt) {
18610       // Swap nodes.
18611       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18612                                   InVec.getOperand(0), InVal, EltNo);
18613       AddToWorklist(NewOp.getNode());
18614       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18615                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18616     }
18617   }
18618 
18619   // If we can't generate a legal BUILD_VECTOR, exit
18620   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18621     return SDValue();
18622 
18623   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18624   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
18625   // vector elements.
18626   SmallVector<SDValue, 8> Ops;
18627   // Do not combine these two vectors if the output vector will not replace
18628   // the input vector.
18629   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18630     Ops.append(InVec.getNode()->op_begin(),
18631                InVec.getNode()->op_end());
18632   } else if (InVec.isUndef()) {
18633     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18634   } else {
18635     return SDValue();
18636   }
18637   assert(Ops.size() == NumElts && "Unexpected vector size");
18638 
18639   // Insert the element
18640   if (Elt < Ops.size()) {
18641     // All the operands of BUILD_VECTOR must have the same type;
18642     // we enforce that here.
18643     EVT OpVT = Ops[0].getValueType();
18644     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18645   }
18646 
18647   // Return the new vector
18648   return DAG.getBuildVector(VT, DL, Ops);
18649 }
18650 
18651 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18652                                                   SDValue EltNo,
18653                                                   LoadSDNode *OriginalLoad) {
18654   assert(OriginalLoad->isSimple());
18655 
18656   EVT ResultVT = EVE->getValueType(0);
18657   EVT VecEltVT = InVecVT.getVectorElementType();
18658 
18659   // If the vector element type is not a multiple of a byte then we are unable
18660   // to correctly compute an address to load only the extracted element as a
18661   // scalar.
18662   if (!VecEltVT.isByteSized())
18663     return SDValue();
18664 
18665   ISD::LoadExtType ExtTy =
18666       ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
18667   if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
18668       !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18669     return SDValue();
18670 
18671   Align Alignment = OriginalLoad->getAlign();
18672   MachinePointerInfo MPI;
18673   SDLoc DL(EVE);
18674   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18675     int Elt = ConstEltNo->getZExtValue();
18676     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18677     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18678     Alignment = commonAlignment(Alignment, PtrOff);
18679   } else {
18680     // Discard the pointer info except the address space because the memory
18681     // operand can't represent this new access since the offset is variable.
18682     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18683     Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
18684   }
18685 
18686   bool IsFast = false;
18687   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
18688                               OriginalLoad->getAddressSpace(), Alignment,
18689                               OriginalLoad->getMemOperand()->getFlags(),
18690                               &IsFast) ||
18691       !IsFast)
18692     return SDValue();
18693 
18694   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
18695                                                InVecVT, EltNo);
18696 
18697   // The replacement we need to do here is a little tricky: we need to
18698   // replace an extractelement of a load with a load.
18699   // Use ReplaceAllUsesOfValuesWith to do the replacement.
18700   // Note that this replacement assumes that the extractvalue is the only
18701   // use of the load; that's okay because we don't want to perform this
18702   // transformation in other cases anyway.
18703   SDValue Load;
18704   SDValue Chain;
18705   if (ResultVT.bitsGT(VecEltVT)) {
18706     // If the result type of vextract is wider than the load, then issue an
18707     // extending load instead.
18708     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18709                                                   VecEltVT)
18710                                    ? ISD::ZEXTLOAD
18711                                    : ISD::EXTLOAD;
18712     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18713                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18714                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
18715                           OriginalLoad->getAAInfo());
18716     Chain = Load.getValue(1);
18717   } else {
18718     Load = DAG.getLoad(
18719         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18720         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18721     Chain = Load.getValue(1);
18722     if (ResultVT.bitsLT(VecEltVT))
18723       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18724     else
18725       Load = DAG.getBitcast(ResultVT, Load);
18726   }
18727   WorklistRemover DeadNodes(*this);
18728   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18729   SDValue To[] = { Load, Chain };
18730   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18731   // Make sure to revisit this node to clean it up; it will usually be dead.
18732   AddToWorklist(EVE);
18733   // Since we're explicitly calling ReplaceAllUses, add the new node to the
18734   // worklist explicitly as well.
18735   AddToWorklistWithUsers(Load.getNode());
18736   ++OpsNarrowed;
18737   return SDValue(EVE, 0);
18738 }
18739 
18740 /// Transform a vector binary operation into a scalar binary operation by moving
18741 /// the math/logic after an extract element of a vector.
18742 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
18743                                        bool LegalOperations) {
18744   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18745   SDValue Vec = ExtElt->getOperand(0);
18746   SDValue Index = ExtElt->getOperand(1);
18747   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18748   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
18749       Vec.getNode()->getNumValues() != 1)
18750     return SDValue();
18751 
18752   // Targets may want to avoid this to prevent an expensive register transfer.
18753   if (!TLI.shouldScalarizeBinop(Vec))
18754     return SDValue();
18755 
18756   // Extracting an element of a vector constant is constant-folded, so this
18757   // transform is just replacing a vector op with a scalar op while moving the
18758   // extract.
18759   SDValue Op0 = Vec.getOperand(0);
18760   SDValue Op1 = Vec.getOperand(1);
18761   if (isAnyConstantBuildVector(Op0, true) ||
18762       isAnyConstantBuildVector(Op1, true)) {
18763     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
18764     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
18765     SDLoc DL(ExtElt);
18766     EVT VT = ExtElt->getValueType(0);
18767     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
18768     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
18769     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
18770   }
18771 
18772   return SDValue();
18773 }
18774 
18775 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
18776   SDValue VecOp = N->getOperand(0);
18777   SDValue Index = N->getOperand(1);
18778   EVT ScalarVT = N->getValueType(0);
18779   EVT VecVT = VecOp.getValueType();
18780   if (VecOp.isUndef())
18781     return DAG.getUNDEF(ScalarVT);
18782 
18783   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
18784   //
18785   // This only really matters if the index is non-constant since other combines
18786   // on the constant elements already work.
18787   SDLoc DL(N);
18788   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
18789       Index == VecOp.getOperand(2)) {
18790     SDValue Elt = VecOp.getOperand(1);
18791     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
18792   }
18793 
18794   // (vextract (scalar_to_vector val, 0) -> val
18795   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18796     // Only 0'th element of SCALAR_TO_VECTOR is defined.
18797     if (DAG.isKnownNeverZero(Index))
18798       return DAG.getUNDEF(ScalarVT);
18799 
18800     // Check if the result type doesn't match the inserted element type. A
18801     // SCALAR_TO_VECTOR may truncate the inserted element and the
18802     // EXTRACT_VECTOR_ELT may widen the extracted vector.
18803     SDValue InOp = VecOp.getOperand(0);
18804     if (InOp.getValueType() != ScalarVT) {
18805       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18806       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18807     }
18808     return InOp;
18809   }
18810 
18811   // extract_vector_elt of out-of-bounds element -> UNDEF
18812   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18813   if (IndexC && VecVT.isFixedLengthVector() &&
18814       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
18815     return DAG.getUNDEF(ScalarVT);
18816 
18817   // extract_vector_elt (build_vector x, y), 1 -> y
18818   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
18819        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
18820       TLI.isTypeLegal(VecVT) &&
18821       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
18822     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
18823             VecVT.isFixedLengthVector()) &&
18824            "BUILD_VECTOR used for scalable vectors");
18825     unsigned IndexVal =
18826         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
18827     SDValue Elt = VecOp.getOperand(IndexVal);
18828     EVT InEltVT = Elt.getValueType();
18829 
18830     // Sometimes build_vector's scalar input types do not match result type.
18831     if (ScalarVT == InEltVT)
18832       return Elt;
18833 
18834     // TODO: It may be useful to truncate if free if the build_vector implicitly
18835     // converts.
18836   }
18837 
18838   if (VecVT.isScalableVector())
18839     return SDValue();
18840 
18841   // All the code from this point onwards assumes fixed width vectors, but it's
18842   // possible that some of the combinations could be made to work for scalable
18843   // vectors too.
18844   unsigned NumElts = VecVT.getVectorNumElements();
18845   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
18846 
18847   // TODO: These transforms should not require the 'hasOneUse' restriction, but
18848   // there are regressions on multiple targets without it. We can end up with a
18849   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
18850   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
18851       VecOp.hasOneUse()) {
18852     // The vector index of the LSBs of the source depend on the endian-ness.
18853     bool IsLE = DAG.getDataLayout().isLittleEndian();
18854     unsigned ExtractIndex = IndexC->getZExtValue();
18855     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
18856     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
18857     SDValue BCSrc = VecOp.getOperand(0);
18858     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
18859       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
18860 
18861     if (LegalTypes && BCSrc.getValueType().isInteger() &&
18862         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18863       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
18864       // trunc i64 X to i32
18865       SDValue X = BCSrc.getOperand(0);
18866       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
18867              "Extract element and scalar to vector can't change element type "
18868              "from FP to integer.");
18869       unsigned XBitWidth = X.getValueSizeInBits();
18870       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
18871 
18872       // An extract element return value type can be wider than its vector
18873       // operand element type. In that case, the high bits are undefined, so
18874       // it's possible that we may need to extend rather than truncate.
18875       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
18876         assert(XBitWidth % VecEltBitWidth == 0 &&
18877                "Scalar bitwidth must be a multiple of vector element bitwidth");
18878         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
18879       }
18880     }
18881   }
18882 
18883   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
18884     return BO;
18885 
18886   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18887   // We only perform this optimization before the op legalization phase because
18888   // we may introduce new vector instructions which are not backed by TD
18889   // patterns. For example on AVX, extracting elements from a wide vector
18890   // without using extract_subvector. However, if we can find an underlying
18891   // scalar value, then we can always use that.
18892   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18893     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18894     // Find the new index to extract from.
18895     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18896 
18897     // Extracting an undef index is undef.
18898     if (OrigElt == -1)
18899       return DAG.getUNDEF(ScalarVT);
18900 
18901     // Select the right vector half to extract from.
18902     SDValue SVInVec;
18903     if (OrigElt < (int)NumElts) {
18904       SVInVec = VecOp.getOperand(0);
18905     } else {
18906       SVInVec = VecOp.getOperand(1);
18907       OrigElt -= NumElts;
18908     }
18909 
18910     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18911       SDValue InOp = SVInVec.getOperand(OrigElt);
18912       if (InOp.getValueType() != ScalarVT) {
18913         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18914         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18915       }
18916 
18917       return InOp;
18918     }
18919 
18920     // FIXME: We should handle recursing on other vector shuffles and
18921     // scalar_to_vector here as well.
18922 
18923     if (!LegalOperations ||
18924         // FIXME: Should really be just isOperationLegalOrCustom.
18925         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
18926         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18927       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18928                          DAG.getVectorIdxConstant(OrigElt, DL));
18929     }
18930   }
18931 
18932   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18933   // simplify it based on the (valid) extraction indices.
18934   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18935         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18936                Use->getOperand(0) == VecOp &&
18937                isa<ConstantSDNode>(Use->getOperand(1));
18938       })) {
18939     APInt DemandedElts = APInt::getZero(NumElts);
18940     for (SDNode *Use : VecOp->uses()) {
18941       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18942       if (CstElt->getAPIntValue().ult(NumElts))
18943         DemandedElts.setBit(CstElt->getZExtValue());
18944     }
18945     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18946       // We simplified the vector operand of this extract element. If this
18947       // extract is not dead, visit it again so it is folded properly.
18948       if (N->getOpcode() != ISD::DELETED_NODE)
18949         AddToWorklist(N);
18950       return SDValue(N, 0);
18951     }
18952     APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
18953     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18954       // We simplified the vector operand of this extract element. If this
18955       // extract is not dead, visit it again so it is folded properly.
18956       if (N->getOpcode() != ISD::DELETED_NODE)
18957         AddToWorklist(N);
18958       return SDValue(N, 0);
18959     }
18960   }
18961 
18962   // Everything under here is trying to match an extract of a loaded value.
18963   // If the result of load has to be truncated, then it's not necessarily
18964   // profitable.
18965   bool BCNumEltsChanged = false;
18966   EVT ExtVT = VecVT.getVectorElementType();
18967   EVT LVT = ExtVT;
18968   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18969     return SDValue();
18970 
18971   if (VecOp.getOpcode() == ISD::BITCAST) {
18972     // Don't duplicate a load with other uses.
18973     if (!VecOp.hasOneUse())
18974       return SDValue();
18975 
18976     EVT BCVT = VecOp.getOperand(0).getValueType();
18977     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
18978       return SDValue();
18979     if (NumElts != BCVT.getVectorNumElements())
18980       BCNumEltsChanged = true;
18981     VecOp = VecOp.getOperand(0);
18982     ExtVT = BCVT.getVectorElementType();
18983   }
18984 
18985   // extract (vector load $addr), i --> load $addr + i * size
18986   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18987       ISD::isNormalLoad(VecOp.getNode()) &&
18988       !Index->hasPredecessor(VecOp.getNode())) {
18989     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18990     if (VecLoad && VecLoad->isSimple())
18991       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18992   }
18993 
18994   // Perform only after legalization to ensure build_vector / vector_shuffle
18995   // optimizations have already been done.
18996   if (!LegalOperations || !IndexC)
18997     return SDValue();
18998 
18999   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
19000   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
19001   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
19002   int Elt = IndexC->getZExtValue();
19003   LoadSDNode *LN0 = nullptr;
19004   if (ISD::isNormalLoad(VecOp.getNode())) {
19005     LN0 = cast<LoadSDNode>(VecOp);
19006   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19007              VecOp.getOperand(0).getValueType() == ExtVT &&
19008              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
19009     // Don't duplicate a load with other uses.
19010     if (!VecOp.hasOneUse())
19011       return SDValue();
19012 
19013     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
19014   }
19015   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
19016     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
19017     // =>
19018     // (load $addr+1*size)
19019 
19020     // Don't duplicate a load with other uses.
19021     if (!VecOp.hasOneUse())
19022       return SDValue();
19023 
19024     // If the bit convert changed the number of elements, it is unsafe
19025     // to examine the mask.
19026     if (BCNumEltsChanged)
19027       return SDValue();
19028 
19029     // Select the input vector, guarding against out of range extract vector.
19030     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
19031     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
19032 
19033     if (VecOp.getOpcode() == ISD::BITCAST) {
19034       // Don't duplicate a load with other uses.
19035       if (!VecOp.hasOneUse())
19036         return SDValue();
19037 
19038       VecOp = VecOp.getOperand(0);
19039     }
19040     if (ISD::isNormalLoad(VecOp.getNode())) {
19041       LN0 = cast<LoadSDNode>(VecOp);
19042       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
19043       Index = DAG.getConstant(Elt, DL, Index.getValueType());
19044     }
19045   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19046              VecVT.getVectorElementType() == ScalarVT &&
19047              (!LegalTypes ||
19048               TLI.isTypeLegal(
19049                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19050     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19051     //      -> extract_vector_elt a, 0
19052     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19053     //      -> extract_vector_elt a, 1
19054     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19055     //      -> extract_vector_elt b, 0
19056     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19057     //      -> extract_vector_elt b, 1
19058     SDLoc SL(N);
19059     EVT ConcatVT = VecOp.getOperand(0).getValueType();
19060     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19061     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19062                                      Index.getValueType());
19063 
19064     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19065     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19066                               ConcatVT.getVectorElementType(),
19067                               ConcatOp, NewIdx);
19068     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19069   }
19070 
19071   // Make sure we found a non-volatile load and the extractelement is
19072   // the only use.
19073   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19074     return SDValue();
19075 
19076   // If Idx was -1 above, Elt is going to be -1, so just return undef.
19077   if (Elt == -1)
19078     return DAG.getUNDEF(LVT);
19079 
19080   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19081 }
19082 
19083 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
19084 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19085   // We perform this optimization post type-legalization because
19086   // the type-legalizer often scalarizes integer-promoted vectors.
19087   // Performing this optimization before may create bit-casts which
19088   // will be type-legalized to complex code sequences.
19089   // We perform this optimization only before the operation legalizer because we
19090   // may introduce illegal operations.
19091   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19092     return SDValue();
19093 
19094   unsigned NumInScalars = N->getNumOperands();
19095   SDLoc DL(N);
19096   EVT VT = N->getValueType(0);
19097 
19098   // Check to see if this is a BUILD_VECTOR of a bunch of values
19099   // which come from any_extend or zero_extend nodes. If so, we can create
19100   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19101   // optimizations. We do not handle sign-extend because we can't fill the sign
19102   // using shuffles.
19103   EVT SourceType = MVT::Other;
19104   bool AllAnyExt = true;
19105 
19106   for (unsigned i = 0; i != NumInScalars; ++i) {
19107     SDValue In = N->getOperand(i);
19108     // Ignore undef inputs.
19109     if (In.isUndef()) continue;
19110 
19111     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
19112     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19113 
19114     // Abort if the element is not an extension.
19115     if (!ZeroExt && !AnyExt) {
19116       SourceType = MVT::Other;
19117       break;
19118     }
19119 
19120     // The input is a ZeroExt or AnyExt. Check the original type.
19121     EVT InTy = In.getOperand(0).getValueType();
19122 
19123     // Check that all of the widened source types are the same.
19124     if (SourceType == MVT::Other)
19125       // First time.
19126       SourceType = InTy;
19127     else if (InTy != SourceType) {
19128       // Multiple income types. Abort.
19129       SourceType = MVT::Other;
19130       break;
19131     }
19132 
19133     // Check if all of the extends are ANY_EXTENDs.
19134     AllAnyExt &= AnyExt;
19135   }
19136 
19137   // In order to have valid types, all of the inputs must be extended from the
19138   // same source type and all of the inputs must be any or zero extend.
19139   // Scalar sizes must be a power of two.
19140   EVT OutScalarTy = VT.getScalarType();
19141   bool ValidTypes = SourceType != MVT::Other &&
19142                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19143                  isPowerOf2_32(SourceType.getSizeInBits());
19144 
19145   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19146   // turn into a single shuffle instruction.
19147   if (!ValidTypes)
19148     return SDValue();
19149 
19150   // If we already have a splat buildvector, then don't fold it if it means
19151   // introducing zeros.
19152   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19153     return SDValue();
19154 
19155   bool isLE = DAG.getDataLayout().isLittleEndian();
19156   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19157   assert(ElemRatio > 1 && "Invalid element size ratio");
19158   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19159                                DAG.getConstant(0, DL, SourceType);
19160 
19161   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19162   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19163 
19164   // Populate the new build_vector
19165   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19166     SDValue Cast = N->getOperand(i);
19167     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
19168             Cast.getOpcode() == ISD::ZERO_EXTEND ||
19169             Cast.isUndef()) && "Invalid cast opcode");
19170     SDValue In;
19171     if (Cast.isUndef())
19172       In = DAG.getUNDEF(SourceType);
19173     else
19174       In = Cast->getOperand(0);
19175     unsigned Index = isLE ? (i * ElemRatio) :
19176                             (i * ElemRatio + (ElemRatio - 1));
19177 
19178     assert(Index < Ops.size() && "Invalid index");
19179     Ops[Index] = In;
19180   }
19181 
19182   // The type of the new BUILD_VECTOR node.
19183   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19184   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
19185          "Invalid vector size");
19186   // Check if the new vector type is legal.
19187   if (!isTypeLegal(VecVT) ||
19188       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19189        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19190     return SDValue();
19191 
19192   // Make the new BUILD_VECTOR.
19193   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19194 
19195   // The new BUILD_VECTOR node has the potential to be further optimized.
19196   AddToWorklist(BV.getNode());
19197   // Bitcast to the desired type.
19198   return DAG.getBitcast(VT, BV);
19199 }
19200 
19201 // Simplify (build_vec (trunc $1)
19202 //                     (trunc (srl $1 half-width))
19203 //                     (trunc (srl $1 (2 * half-width))) …)
19204 // to (bitcast $1)
19205 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19206   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19207 
19208   // Only for little endian
19209   if (!DAG.getDataLayout().isLittleEndian())
19210     return SDValue();
19211 
19212   SDLoc DL(N);
19213   EVT VT = N->getValueType(0);
19214   EVT OutScalarTy = VT.getScalarType();
19215   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19216 
19217   // Only for power of two types to be sure that bitcast works well
19218   if (!isPowerOf2_64(ScalarTypeBitsize))
19219     return SDValue();
19220 
19221   unsigned NumInScalars = N->getNumOperands();
19222 
19223   // Look through bitcasts
19224   auto PeekThroughBitcast = [](SDValue Op) {
19225     if (Op.getOpcode() == ISD::BITCAST)
19226       return Op.getOperand(0);
19227     return Op;
19228   };
19229 
19230   // The source value where all the parts are extracted.
19231   SDValue Src;
19232   for (unsigned i = 0; i != NumInScalars; ++i) {
19233     SDValue In = PeekThroughBitcast(N->getOperand(i));
19234     // Ignore undef inputs.
19235     if (In.isUndef()) continue;
19236 
19237     if (In.getOpcode() != ISD::TRUNCATE)
19238       return SDValue();
19239 
19240     In = PeekThroughBitcast(In.getOperand(0));
19241 
19242     if (In.getOpcode() != ISD::SRL) {
19243       // For now only build_vec without shuffling, handle shifts here in the
19244       // future.
19245       if (i != 0)
19246         return SDValue();
19247 
19248       Src = In;
19249     } else {
19250       // In is SRL
19251       SDValue part = PeekThroughBitcast(In.getOperand(0));
19252 
19253       if (!Src) {
19254         Src = part;
19255       } else if (Src != part) {
19256         // Vector parts do not stem from the same variable
19257         return SDValue();
19258       }
19259 
19260       SDValue ShiftAmtVal = In.getOperand(1);
19261       if (!isa<ConstantSDNode>(ShiftAmtVal))
19262         return SDValue();
19263 
19264       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19265 
19266       // The extracted value is not extracted at the right position
19267       if (ShiftAmt != i * ScalarTypeBitsize)
19268         return SDValue();
19269     }
19270   }
19271 
19272   // Only cast if the size is the same
19273   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19274     return SDValue();
19275 
19276   return DAG.getBitcast(VT, Src);
19277 }
19278 
19279 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19280                                            ArrayRef<int> VectorMask,
19281                                            SDValue VecIn1, SDValue VecIn2,
19282                                            unsigned LeftIdx, bool DidSplitVec) {
19283   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19284 
19285   EVT VT = N->getValueType(0);
19286   EVT InVT1 = VecIn1.getValueType();
19287   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19288 
19289   unsigned NumElems = VT.getVectorNumElements();
19290   unsigned ShuffleNumElems = NumElems;
19291 
19292   // If we artificially split a vector in two already, then the offsets in the
19293   // operands will all be based off of VecIn1, even those in VecIn2.
19294   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19295 
19296   uint64_t VTSize = VT.getFixedSizeInBits();
19297   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19298   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19299 
19300   assert(InVT2Size <= InVT1Size &&
19301          "Inputs must be sorted to be in non-increasing vector size order.");
19302 
19303   // We can't generate a shuffle node with mismatched input and output types.
19304   // Try to make the types match the type of the output.
19305   if (InVT1 != VT || InVT2 != VT) {
19306     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19307       // If the output vector length is a multiple of both input lengths,
19308       // we can concatenate them and pad the rest with undefs.
19309       unsigned NumConcats = VTSize / InVT1Size;
19310       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
19311       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19312       ConcatOps[0] = VecIn1;
19313       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19314       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19315       VecIn2 = SDValue();
19316     } else if (InVT1Size == VTSize * 2) {
19317       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19318         return SDValue();
19319 
19320       if (!VecIn2.getNode()) {
19321         // If we only have one input vector, and it's twice the size of the
19322         // output, split it in two.
19323         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19324                              DAG.getVectorIdxConstant(NumElems, DL));
19325         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19326         // Since we now have shorter input vectors, adjust the offset of the
19327         // second vector's start.
19328         Vec2Offset = NumElems;
19329       } else {
19330         assert(InVT2Size <= InVT1Size &&
19331                "Second input is not going to be larger than the first one.");
19332 
19333         // VecIn1 is wider than the output, and we have another, possibly
19334         // smaller input. Pad the smaller input with undefs, shuffle at the
19335         // input vector width, and extract the output.
19336         // The shuffle type is different than VT, so check legality again.
19337         if (LegalOperations &&
19338             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19339           return SDValue();
19340 
19341         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19342         // lower it back into a BUILD_VECTOR. So if the inserted type is
19343         // illegal, don't even try.
19344         if (InVT1 != InVT2) {
19345           if (!TLI.isTypeLegal(InVT2))
19346             return SDValue();
19347           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19348                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19349         }
19350         ShuffleNumElems = NumElems * 2;
19351       }
19352     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19353       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19354       ConcatOps[0] = VecIn2;
19355       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19356     } else {
19357       // TODO: Support cases where the length mismatch isn't exactly by a
19358       // factor of 2.
19359       // TODO: Move this check upwards, so that if we have bad type
19360       // mismatches, we don't create any DAG nodes.
19361       return SDValue();
19362     }
19363   }
19364 
19365   // Initialize mask to undef.
19366   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19367 
19368   // Only need to run up to the number of elements actually used, not the
19369   // total number of elements in the shuffle - if we are shuffling a wider
19370   // vector, the high lanes should be set to undef.
19371   for (unsigned i = 0; i != NumElems; ++i) {
19372     if (VectorMask[i] <= 0)
19373       continue;
19374 
19375     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19376     if (VectorMask[i] == (int)LeftIdx) {
19377       Mask[i] = ExtIndex;
19378     } else if (VectorMask[i] == (int)LeftIdx + 1) {
19379       Mask[i] = Vec2Offset + ExtIndex;
19380     }
19381   }
19382 
19383   // The type the input vectors may have changed above.
19384   InVT1 = VecIn1.getValueType();
19385 
19386   // If we already have a VecIn2, it should have the same type as VecIn1.
19387   // If we don't, get an undef/zero vector of the appropriate type.
19388   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19389   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
19390 
19391   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19392   if (ShuffleNumElems > NumElems)
19393     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19394 
19395   return Shuffle;
19396 }
19397 
19398 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19399   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19400 
19401   // First, determine where the build vector is not undef.
19402   // TODO: We could extend this to handle zero elements as well as undefs.
19403   int NumBVOps = BV->getNumOperands();
19404   int ZextElt = -1;
19405   for (int i = 0; i != NumBVOps; ++i) {
19406     SDValue Op = BV->getOperand(i);
19407     if (Op.isUndef())
19408       continue;
19409     if (ZextElt == -1)
19410       ZextElt = i;
19411     else
19412       return SDValue();
19413   }
19414   // Bail out if there's no non-undef element.
19415   if (ZextElt == -1)
19416     return SDValue();
19417 
19418   // The build vector contains some number of undef elements and exactly
19419   // one other element. That other element must be a zero-extended scalar
19420   // extracted from a vector at a constant index to turn this into a shuffle.
19421   // Also, require that the build vector does not implicitly truncate/extend
19422   // its elements.
19423   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19424   EVT VT = BV->getValueType(0);
19425   SDValue Zext = BV->getOperand(ZextElt);
19426   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19427       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19428       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19429       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19430     return SDValue();
19431 
19432   // The zero-extend must be a multiple of the source size, and we must be
19433   // building a vector of the same size as the source of the extract element.
19434   SDValue Extract = Zext.getOperand(0);
19435   unsigned DestSize = Zext.getValueSizeInBits();
19436   unsigned SrcSize = Extract.getValueSizeInBits();
19437   if (DestSize % SrcSize != 0 ||
19438       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19439     return SDValue();
19440 
19441   // Create a shuffle mask that will combine the extracted element with zeros
19442   // and undefs.
19443   int ZextRatio = DestSize / SrcSize;
19444   int NumMaskElts = NumBVOps * ZextRatio;
19445   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19446   for (int i = 0; i != NumMaskElts; ++i) {
19447     if (i / ZextRatio == ZextElt) {
19448       // The low bits of the (potentially translated) extracted element map to
19449       // the source vector. The high bits map to zero. We will use a zero vector
19450       // as the 2nd source operand of the shuffle, so use the 1st element of
19451       // that vector (mask value is number-of-elements) for the high bits.
19452       if (i % ZextRatio == 0)
19453         ShufMask[i] = Extract.getConstantOperandVal(1);
19454       else
19455         ShufMask[i] = NumMaskElts;
19456     }
19457 
19458     // Undef elements of the build vector remain undef because we initialize
19459     // the shuffle mask with -1.
19460   }
19461 
19462   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19463   // bitcast (shuffle V, ZeroVec, VectorMask)
19464   SDLoc DL(BV);
19465   EVT VecVT = Extract.getOperand(0).getValueType();
19466   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19467   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19468   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19469                                              ZeroVec, ShufMask, DAG);
19470   if (!Shuf)
19471     return SDValue();
19472   return DAG.getBitcast(VT, Shuf);
19473 }
19474 
19475 // FIXME: promote to STLExtras.
19476 template <typename R, typename T>
19477 static auto getFirstIndexOf(R &&Range, const T &Val) {
19478   auto I = find(Range, Val);
19479   if (I == Range.end())
19480     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19481   return std::distance(Range.begin(), I);
19482 }
19483 
19484 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19485 // operations. If the types of the vectors we're extracting from allow it,
19486 // turn this into a vector_shuffle node.
19487 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19488   SDLoc DL(N);
19489   EVT VT = N->getValueType(0);
19490 
19491   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19492   if (!isTypeLegal(VT))
19493     return SDValue();
19494 
19495   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19496     return V;
19497 
19498   // May only combine to shuffle after legalize if shuffle is legal.
19499   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19500     return SDValue();
19501 
19502   bool UsesZeroVector = false;
19503   unsigned NumElems = N->getNumOperands();
19504 
19505   // Record, for each element of the newly built vector, which input vector
19506   // that element comes from. -1 stands for undef, 0 for the zero vector,
19507   // and positive values for the input vectors.
19508   // VectorMask maps each element to its vector number, and VecIn maps vector
19509   // numbers to their initial SDValues.
19510 
19511   SmallVector<int, 8> VectorMask(NumElems, -1);
19512   SmallVector<SDValue, 8> VecIn;
19513   VecIn.push_back(SDValue());
19514 
19515   for (unsigned i = 0; i != NumElems; ++i) {
19516     SDValue Op = N->getOperand(i);
19517 
19518     if (Op.isUndef())
19519       continue;
19520 
19521     // See if we can use a blend with a zero vector.
19522     // TODO: Should we generalize this to a blend with an arbitrary constant
19523     // vector?
19524     if (isNullConstant(Op) || isNullFPConstant(Op)) {
19525       UsesZeroVector = true;
19526       VectorMask[i] = 0;
19527       continue;
19528     }
19529 
19530     // Not an undef or zero. If the input is something other than an
19531     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19532     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19533         !isa<ConstantSDNode>(Op.getOperand(1)))
19534       return SDValue();
19535     SDValue ExtractedFromVec = Op.getOperand(0);
19536 
19537     if (ExtractedFromVec.getValueType().isScalableVector())
19538       return SDValue();
19539 
19540     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19541     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19542       return SDValue();
19543 
19544     // All inputs must have the same element type as the output.
19545     if (VT.getVectorElementType() !=
19546         ExtractedFromVec.getValueType().getVectorElementType())
19547       return SDValue();
19548 
19549     // Have we seen this input vector before?
19550     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19551     // a map back from SDValues to numbers isn't worth it.
19552     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
19553     if (Idx == -1) { // A new source vector?
19554       Idx = VecIn.size();
19555       VecIn.push_back(ExtractedFromVec);
19556     }
19557 
19558     VectorMask[i] = Idx;
19559   }
19560 
19561   // If we didn't find at least one input vector, bail out.
19562   if (VecIn.size() < 2)
19563     return SDValue();
19564 
19565   // If all the Operands of BUILD_VECTOR extract from same
19566   // vector, then split the vector efficiently based on the maximum
19567   // vector access index and adjust the VectorMask and
19568   // VecIn accordingly.
19569   bool DidSplitVec = false;
19570   if (VecIn.size() == 2) {
19571     unsigned MaxIndex = 0;
19572     unsigned NearestPow2 = 0;
19573     SDValue Vec = VecIn.back();
19574     EVT InVT = Vec.getValueType();
19575     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19576 
19577     for (unsigned i = 0; i < NumElems; i++) {
19578       if (VectorMask[i] <= 0)
19579         continue;
19580       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19581       IndexVec[i] = Index;
19582       MaxIndex = std::max(MaxIndex, Index);
19583     }
19584 
19585     NearestPow2 = PowerOf2Ceil(MaxIndex);
19586     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19587         NumElems * 2 < NearestPow2) {
19588       unsigned SplitSize = NearestPow2 / 2;
19589       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19590                                      InVT.getVectorElementType(), SplitSize);
19591       if (TLI.isTypeLegal(SplitVT) &&
19592           SplitSize + SplitVT.getVectorNumElements() <=
19593               InVT.getVectorNumElements()) {
19594         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19595                                      DAG.getVectorIdxConstant(SplitSize, DL));
19596         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19597                                      DAG.getVectorIdxConstant(0, DL));
19598         VecIn.pop_back();
19599         VecIn.push_back(VecIn1);
19600         VecIn.push_back(VecIn2);
19601         DidSplitVec = true;
19602 
19603         for (unsigned i = 0; i < NumElems; i++) {
19604           if (VectorMask[i] <= 0)
19605             continue;
19606           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19607         }
19608       }
19609     }
19610   }
19611 
19612   // Sort input vectors by decreasing vector element count,
19613   // while preserving the relative order of equally-sized vectors.
19614   // Note that we keep the first "implicit zero vector as-is.
19615   SmallVector<SDValue, 8> SortedVecIn(VecIn);
19616   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
19617                     [](const SDValue &a, const SDValue &b) {
19618                       return a.getValueType().getVectorNumElements() >
19619                              b.getValueType().getVectorNumElements();
19620                     });
19621 
19622   // We now also need to rebuild the VectorMask, because it referenced element
19623   // order in VecIn, and we just sorted them.
19624   for (int &SourceVectorIndex : VectorMask) {
19625     if (SourceVectorIndex <= 0)
19626       continue;
19627     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
19628     assert(Idx > 0 && Idx < SortedVecIn.size() &&
19629            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
19630     SourceVectorIndex = Idx;
19631   }
19632 
19633   VecIn = std::move(SortedVecIn);
19634 
19635   // TODO: Should this fire if some of the input vectors has illegal type (like
19636   // it does now), or should we let legalization run its course first?
19637 
19638   // Shuffle phase:
19639   // Take pairs of vectors, and shuffle them so that the result has elements
19640   // from these vectors in the correct places.
19641   // For example, given:
19642   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19643   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19644   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19645   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19646   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19647   // We will generate:
19648   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19649   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19650   SmallVector<SDValue, 4> Shuffles;
19651   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19652     unsigned LeftIdx = 2 * In + 1;
19653     SDValue VecLeft = VecIn[LeftIdx];
19654     SDValue VecRight =
19655         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19656 
19657     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19658                                                 VecRight, LeftIdx, DidSplitVec))
19659       Shuffles.push_back(Shuffle);
19660     else
19661       return SDValue();
19662   }
19663 
19664   // If we need the zero vector as an "ingredient" in the blend tree, add it
19665   // to the list of shuffles.
19666   if (UsesZeroVector)
19667     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19668                                       : DAG.getConstantFP(0.0, DL, VT));
19669 
19670   // If we only have one shuffle, we're done.
19671   if (Shuffles.size() == 1)
19672     return Shuffles[0];
19673 
19674   // Update the vector mask to point to the post-shuffle vectors.
19675   for (int &Vec : VectorMask)
19676     if (Vec == 0)
19677       Vec = Shuffles.size() - 1;
19678     else
19679       Vec = (Vec - 1) / 2;
19680 
19681   // More than one shuffle. Generate a binary tree of blends, e.g. if from
19682   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19683   // generate:
19684   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19685   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19686   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19687   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19688   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19689   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19690   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19691 
19692   // Make sure the initial size of the shuffle list is even.
19693   if (Shuffles.size() % 2)
19694     Shuffles.push_back(DAG.getUNDEF(VT));
19695 
19696   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19697     if (CurSize % 2) {
19698       Shuffles[CurSize] = DAG.getUNDEF(VT);
19699       CurSize++;
19700     }
19701     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19702       int Left = 2 * In;
19703       int Right = 2 * In + 1;
19704       SmallVector<int, 8> Mask(NumElems, -1);
19705       for (unsigned i = 0; i != NumElems; ++i) {
19706         if (VectorMask[i] == Left) {
19707           Mask[i] = i;
19708           VectorMask[i] = In;
19709         } else if (VectorMask[i] == Right) {
19710           Mask[i] = i + NumElems;
19711           VectorMask[i] = In;
19712         }
19713       }
19714 
19715       Shuffles[In] =
19716           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19717     }
19718   }
19719   return Shuffles[0];
19720 }
19721 
19722 // Try to turn a build vector of zero extends of extract vector elts into a
19723 // a vector zero extend and possibly an extract subvector.
19724 // TODO: Support sign extend?
19725 // TODO: Allow undef elements?
19726 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19727   if (LegalOperations)
19728     return SDValue();
19729 
19730   EVT VT = N->getValueType(0);
19731 
19732   bool FoundZeroExtend = false;
19733   SDValue Op0 = N->getOperand(0);
19734   auto checkElem = [&](SDValue Op) -> int64_t {
19735     unsigned Opc = Op.getOpcode();
19736     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19737     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19738         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19739         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19740       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19741         return C->getZExtValue();
19742     return -1;
19743   };
19744 
19745   // Make sure the first element matches
19746   // (zext (extract_vector_elt X, C))
19747   // Offset must be a constant multiple of the
19748   // known-minimum vector length of the result type.
19749   int64_t Offset = checkElem(Op0);
19750   if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
19751     return SDValue();
19752 
19753   unsigned NumElems = N->getNumOperands();
19754   SDValue In = Op0.getOperand(0).getOperand(0);
19755   EVT InSVT = In.getValueType().getScalarType();
19756   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
19757 
19758   // Don't create an illegal input type after type legalization.
19759   if (LegalTypes && !TLI.isTypeLegal(InVT))
19760     return SDValue();
19761 
19762   // Ensure all the elements come from the same vector and are adjacent.
19763   for (unsigned i = 1; i != NumElems; ++i) {
19764     if ((Offset + i) != checkElem(N->getOperand(i)))
19765       return SDValue();
19766   }
19767 
19768   SDLoc DL(N);
19769   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
19770                    Op0.getOperand(0).getOperand(1));
19771   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
19772                      VT, In);
19773 }
19774 
19775 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
19776   EVT VT = N->getValueType(0);
19777 
19778   // A vector built entirely of undefs is undef.
19779   if (ISD::allOperandsUndef(N))
19780     return DAG.getUNDEF(VT);
19781 
19782   // If this is a splat of a bitcast from another vector, change to a
19783   // concat_vector.
19784   // For example:
19785   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
19786   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
19787   //
19788   // If X is a build_vector itself, the concat can become a larger build_vector.
19789   // TODO: Maybe this is useful for non-splat too?
19790   if (!LegalOperations) {
19791     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19792       Splat = peekThroughBitcasts(Splat);
19793       EVT SrcVT = Splat.getValueType();
19794       if (SrcVT.isVector()) {
19795         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
19796         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
19797                                      SrcVT.getVectorElementType(), NumElts);
19798         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
19799           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
19800           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
19801                                        NewVT, Ops);
19802           return DAG.getBitcast(VT, Concat);
19803         }
19804       }
19805     }
19806   }
19807 
19808   // Check if we can express BUILD VECTOR via subvector extract.
19809   if (!LegalTypes && (N->getNumOperands() > 1)) {
19810     SDValue Op0 = N->getOperand(0);
19811     auto checkElem = [&](SDValue Op) -> uint64_t {
19812       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
19813           (Op0.getOperand(0) == Op.getOperand(0)))
19814         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
19815           return CNode->getZExtValue();
19816       return -1;
19817     };
19818 
19819     int Offset = checkElem(Op0);
19820     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
19821       if (Offset + i != checkElem(N->getOperand(i))) {
19822         Offset = -1;
19823         break;
19824       }
19825     }
19826 
19827     if ((Offset == 0) &&
19828         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
19829       return Op0.getOperand(0);
19830     if ((Offset != -1) &&
19831         ((Offset % N->getValueType(0).getVectorNumElements()) ==
19832          0)) // IDX must be multiple of output size.
19833       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
19834                          Op0.getOperand(0), Op0.getOperand(1));
19835   }
19836 
19837   if (SDValue V = convertBuildVecZextToZext(N))
19838     return V;
19839 
19840   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
19841     return V;
19842 
19843   if (SDValue V = reduceBuildVecTruncToBitCast(N))
19844     return V;
19845 
19846   if (SDValue V = reduceBuildVecToShuffle(N))
19847     return V;
19848 
19849   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
19850   // Do this late as some of the above may replace the splat.
19851   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
19852     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19853       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
19854       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
19855     }
19856 
19857   return SDValue();
19858 }
19859 
19860 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
19861   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19862   EVT OpVT = N->getOperand(0).getValueType();
19863 
19864   // If the operands are legal vectors, leave them alone.
19865   if (TLI.isTypeLegal(OpVT))
19866     return SDValue();
19867 
19868   SDLoc DL(N);
19869   EVT VT = N->getValueType(0);
19870   SmallVector<SDValue, 8> Ops;
19871 
19872   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
19873   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19874 
19875   // Keep track of what we encounter.
19876   bool AnyInteger = false;
19877   bool AnyFP = false;
19878   for (const SDValue &Op : N->ops()) {
19879     if (ISD::BITCAST == Op.getOpcode() &&
19880         !Op.getOperand(0).getValueType().isVector())
19881       Ops.push_back(Op.getOperand(0));
19882     else if (ISD::UNDEF == Op.getOpcode())
19883       Ops.push_back(ScalarUndef);
19884     else
19885       return SDValue();
19886 
19887     // Note whether we encounter an integer or floating point scalar.
19888     // If it's neither, bail out, it could be something weird like x86mmx.
19889     EVT LastOpVT = Ops.back().getValueType();
19890     if (LastOpVT.isFloatingPoint())
19891       AnyFP = true;
19892     else if (LastOpVT.isInteger())
19893       AnyInteger = true;
19894     else
19895       return SDValue();
19896   }
19897 
19898   // If any of the operands is a floating point scalar bitcast to a vector,
19899   // use floating point types throughout, and bitcast everything.
19900   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
19901   if (AnyFP) {
19902     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
19903     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19904     if (AnyInteger) {
19905       for (SDValue &Op : Ops) {
19906         if (Op.getValueType() == SVT)
19907           continue;
19908         if (Op.isUndef())
19909           Op = ScalarUndef;
19910         else
19911           Op = DAG.getBitcast(SVT, Op);
19912       }
19913     }
19914   }
19915 
19916   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
19917                                VT.getSizeInBits() / SVT.getSizeInBits());
19918   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
19919 }
19920 
19921 // Attempt to merge nested concat_vectors/undefs.
19922 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
19923 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
19924 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
19925                                                   SelectionDAG &DAG) {
19926   EVT VT = N->getValueType(0);
19927 
19928   // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
19929   EVT SubVT;
19930   SDValue FirstConcat;
19931   for (const SDValue &Op : N->ops()) {
19932     if (Op.isUndef())
19933       continue;
19934     if (Op.getOpcode() != ISD::CONCAT_VECTORS)
19935       return SDValue();
19936     if (!FirstConcat) {
19937       SubVT = Op.getOperand(0).getValueType();
19938       if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
19939         return SDValue();
19940       FirstConcat = Op;
19941       continue;
19942     }
19943     if (SubVT != Op.getOperand(0).getValueType())
19944       return SDValue();
19945   }
19946   assert(FirstConcat && "Concat of all-undefs found");
19947 
19948   SmallVector<SDValue> ConcatOps;
19949   for (const SDValue &Op : N->ops()) {
19950     if (Op.isUndef()) {
19951       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
19952       continue;
19953     }
19954     ConcatOps.append(Op->op_begin(), Op->op_end());
19955   }
19956   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
19957 }
19958 
19959 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
19960 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
19961 // most two distinct vectors the same size as the result, attempt to turn this
19962 // into a legal shuffle.
19963 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19964   EVT VT = N->getValueType(0);
19965   EVT OpVT = N->getOperand(0).getValueType();
19966 
19967   // We currently can't generate an appropriate shuffle for a scalable vector.
19968   if (VT.isScalableVector())
19969     return SDValue();
19970 
19971   int NumElts = VT.getVectorNumElements();
19972   int NumOpElts = OpVT.getVectorNumElements();
19973 
19974   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19975   SmallVector<int, 8> Mask;
19976 
19977   for (SDValue Op : N->ops()) {
19978     Op = peekThroughBitcasts(Op);
19979 
19980     // UNDEF nodes convert to UNDEF shuffle mask values.
19981     if (Op.isUndef()) {
19982       Mask.append((unsigned)NumOpElts, -1);
19983       continue;
19984     }
19985 
19986     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19987       return SDValue();
19988 
19989     // What vector are we extracting the subvector from and at what index?
19990     SDValue ExtVec = Op.getOperand(0);
19991     int ExtIdx = Op.getConstantOperandVal(1);
19992 
19993     // We want the EVT of the original extraction to correctly scale the
19994     // extraction index.
19995     EVT ExtVT = ExtVec.getValueType();
19996     ExtVec = peekThroughBitcasts(ExtVec);
19997 
19998     // UNDEF nodes convert to UNDEF shuffle mask values.
19999     if (ExtVec.isUndef()) {
20000       Mask.append((unsigned)NumOpElts, -1);
20001       continue;
20002     }
20003 
20004     // Ensure that we are extracting a subvector from a vector the same
20005     // size as the result.
20006     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
20007       return SDValue();
20008 
20009     // Scale the subvector index to account for any bitcast.
20010     int NumExtElts = ExtVT.getVectorNumElements();
20011     if (0 == (NumExtElts % NumElts))
20012       ExtIdx /= (NumExtElts / NumElts);
20013     else if (0 == (NumElts % NumExtElts))
20014       ExtIdx *= (NumElts / NumExtElts);
20015     else
20016       return SDValue();
20017 
20018     // At most we can reference 2 inputs in the final shuffle.
20019     if (SV0.isUndef() || SV0 == ExtVec) {
20020       SV0 = ExtVec;
20021       for (int i = 0; i != NumOpElts; ++i)
20022         Mask.push_back(i + ExtIdx);
20023     } else if (SV1.isUndef() || SV1 == ExtVec) {
20024       SV1 = ExtVec;
20025       for (int i = 0; i != NumOpElts; ++i)
20026         Mask.push_back(i + ExtIdx + NumElts);
20027     } else {
20028       return SDValue();
20029     }
20030   }
20031 
20032   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20033   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
20034                                      DAG.getBitcast(VT, SV1), Mask, DAG);
20035 }
20036 
20037 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
20038   unsigned CastOpcode = N->getOperand(0).getOpcode();
20039   switch (CastOpcode) {
20040   case ISD::SINT_TO_FP:
20041   case ISD::UINT_TO_FP:
20042   case ISD::FP_TO_SINT:
20043   case ISD::FP_TO_UINT:
20044     // TODO: Allow more opcodes?
20045     //  case ISD::BITCAST:
20046     //  case ISD::TRUNCATE:
20047     //  case ISD::ZERO_EXTEND:
20048     //  case ISD::SIGN_EXTEND:
20049     //  case ISD::FP_EXTEND:
20050     break;
20051   default:
20052     return SDValue();
20053   }
20054 
20055   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20056   if (!SrcVT.isVector())
20057     return SDValue();
20058 
20059   // All operands of the concat must be the same kind of cast from the same
20060   // source type.
20061   SmallVector<SDValue, 4> SrcOps;
20062   for (SDValue Op : N->ops()) {
20063     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20064         Op.getOperand(0).getValueType() != SrcVT)
20065       return SDValue();
20066     SrcOps.push_back(Op.getOperand(0));
20067   }
20068 
20069   // The wider cast must be supported by the target. This is unusual because
20070   // the operation support type parameter depends on the opcode. In addition,
20071   // check the other type in the cast to make sure this is really legal.
20072   EVT VT = N->getValueType(0);
20073   EVT SrcEltVT = SrcVT.getVectorElementType();
20074   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20075   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20076   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20077   switch (CastOpcode) {
20078   case ISD::SINT_TO_FP:
20079   case ISD::UINT_TO_FP:
20080     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20081         !TLI.isTypeLegal(VT))
20082       return SDValue();
20083     break;
20084   case ISD::FP_TO_SINT:
20085   case ISD::FP_TO_UINT:
20086     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20087         !TLI.isTypeLegal(ConcatSrcVT))
20088       return SDValue();
20089     break;
20090   default:
20091     llvm_unreachable("Unexpected cast opcode");
20092   }
20093 
20094   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20095   SDLoc DL(N);
20096   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
20097   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20098 }
20099 
20100 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20101   // If we only have one input vector, we don't need to do any concatenation.
20102   if (N->getNumOperands() == 1)
20103     return N->getOperand(0);
20104 
20105   // Check if all of the operands are undefs.
20106   EVT VT = N->getValueType(0);
20107   if (ISD::allOperandsUndef(N))
20108     return DAG.getUNDEF(VT);
20109 
20110   // Optimize concat_vectors where all but the first of the vectors are undef.
20111   if (all_of(drop_begin(N->ops()),
20112              [](const SDValue &Op) { return Op.isUndef(); })) {
20113     SDValue In = N->getOperand(0);
20114     assert(In.getValueType().isVector() && "Must concat vectors");
20115 
20116     // If the input is a concat_vectors, just make a larger concat by padding
20117     // with smaller undefs.
20118     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20119       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20120       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20121       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20122       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20123     }
20124 
20125     SDValue Scalar = peekThroughOneUseBitcasts(In);
20126 
20127     // concat_vectors(scalar_to_vector(scalar), undef) ->
20128     //     scalar_to_vector(scalar)
20129     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20130          Scalar.hasOneUse()) {
20131       EVT SVT = Scalar.getValueType().getVectorElementType();
20132       if (SVT == Scalar.getOperand(0).getValueType())
20133         Scalar = Scalar.getOperand(0);
20134     }
20135 
20136     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20137     if (!Scalar.getValueType().isVector()) {
20138       // If the bitcast type isn't legal, it might be a trunc of a legal type;
20139       // look through the trunc so we can still do the transform:
20140       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20141       if (Scalar->getOpcode() == ISD::TRUNCATE &&
20142           !TLI.isTypeLegal(Scalar.getValueType()) &&
20143           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20144         Scalar = Scalar->getOperand(0);
20145 
20146       EVT SclTy = Scalar.getValueType();
20147 
20148       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20149         return SDValue();
20150 
20151       // Bail out if the vector size is not a multiple of the scalar size.
20152       if (VT.getSizeInBits() % SclTy.getSizeInBits())
20153         return SDValue();
20154 
20155       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20156       if (VNTNumElms < 2)
20157         return SDValue();
20158 
20159       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20160       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20161         return SDValue();
20162 
20163       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20164       return DAG.getBitcast(VT, Res);
20165     }
20166   }
20167 
20168   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20169   // We have already tested above for an UNDEF only concatenation.
20170   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20171   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20172   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20173     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20174   };
20175   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20176     SmallVector<SDValue, 8> Opnds;
20177     EVT SVT = VT.getScalarType();
20178 
20179     EVT MinVT = SVT;
20180     if (!SVT.isFloatingPoint()) {
20181       // If BUILD_VECTOR are from built from integer, they may have different
20182       // operand types. Get the smallest type and truncate all operands to it.
20183       bool FoundMinVT = false;
20184       for (const SDValue &Op : N->ops())
20185         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20186           EVT OpSVT = Op.getOperand(0).getValueType();
20187           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20188           FoundMinVT = true;
20189         }
20190       assert(FoundMinVT && "Concat vector type mismatch");
20191     }
20192 
20193     for (const SDValue &Op : N->ops()) {
20194       EVT OpVT = Op.getValueType();
20195       unsigned NumElts = OpVT.getVectorNumElements();
20196 
20197       if (ISD::UNDEF == Op.getOpcode())
20198         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20199 
20200       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20201         if (SVT.isFloatingPoint()) {
20202           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
20203           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20204         } else {
20205           for (unsigned i = 0; i != NumElts; ++i)
20206             Opnds.push_back(
20207                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20208         }
20209       }
20210     }
20211 
20212     assert(VT.getVectorNumElements() == Opnds.size() &&
20213            "Concat vector type mismatch");
20214     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20215   }
20216 
20217   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20218   // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20219   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20220     return V;
20221 
20222   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20223     // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20224     if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
20225       return V;
20226 
20227     // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20228     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20229       return V;
20230   }
20231 
20232   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20233     return V;
20234 
20235   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20236   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20237   // operands and look for a CONCAT operations that place the incoming vectors
20238   // at the exact same location.
20239   //
20240   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20241   SDValue SingleSource = SDValue();
20242   unsigned PartNumElem =
20243       N->getOperand(0).getValueType().getVectorMinNumElements();
20244 
20245   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20246     SDValue Op = N->getOperand(i);
20247 
20248     if (Op.isUndef())
20249       continue;
20250 
20251     // Check if this is the identity extract:
20252     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20253       return SDValue();
20254 
20255     // Find the single incoming vector for the extract_subvector.
20256     if (SingleSource.getNode()) {
20257       if (Op.getOperand(0) != SingleSource)
20258         return SDValue();
20259     } else {
20260       SingleSource = Op.getOperand(0);
20261 
20262       // Check the source type is the same as the type of the result.
20263       // If not, this concat may extend the vector, so we can not
20264       // optimize it away.
20265       if (SingleSource.getValueType() != N->getValueType(0))
20266         return SDValue();
20267     }
20268 
20269     // Check that we are reading from the identity index.
20270     unsigned IdentityIndex = i * PartNumElem;
20271     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20272       return SDValue();
20273   }
20274 
20275   if (SingleSource.getNode())
20276     return SingleSource;
20277 
20278   return SDValue();
20279 }
20280 
20281 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20282 // if the subvector can be sourced for free.
20283 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20284   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20285       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20286     return V.getOperand(1);
20287   }
20288   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20289   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20290       V.getOperand(0).getValueType() == SubVT &&
20291       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20292     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20293     return V.getOperand(SubIdx);
20294   }
20295   return SDValue();
20296 }
20297 
20298 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
20299                                               SelectionDAG &DAG,
20300                                               bool LegalOperations) {
20301   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20302   SDValue BinOp = Extract->getOperand(0);
20303   unsigned BinOpcode = BinOp.getOpcode();
20304   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20305     return SDValue();
20306 
20307   EVT VecVT = BinOp.getValueType();
20308   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20309   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20310     return SDValue();
20311 
20312   SDValue Index = Extract->getOperand(1);
20313   EVT SubVT = Extract->getValueType(0);
20314   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20315     return SDValue();
20316 
20317   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20318   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20319 
20320   // TODO: We could handle the case where only 1 operand is being inserted by
20321   //       creating an extract of the other operand, but that requires checking
20322   //       number of uses and/or costs.
20323   if (!Sub0 || !Sub1)
20324     return SDValue();
20325 
20326   // We are inserting both operands of the wide binop only to extract back
20327   // to the narrow vector size. Eliminate all of the insert/extract:
20328   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20329   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20330                      BinOp->getFlags());
20331 }
20332 
20333 /// If we are extracting a subvector produced by a wide binary operator try
20334 /// to use a narrow binary operator and/or avoid concatenation and extraction.
20335 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
20336                                           bool LegalOperations) {
20337   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20338   // some of these bailouts with other transforms.
20339 
20340   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20341     return V;
20342 
20343   // The extract index must be a constant, so we can map it to a concat operand.
20344   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20345   if (!ExtractIndexC)
20346     return SDValue();
20347 
20348   // We are looking for an optionally bitcasted wide vector binary operator
20349   // feeding an extract subvector.
20350   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20351   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20352   unsigned BOpcode = BinOp.getOpcode();
20353   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20354     return SDValue();
20355 
20356   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20357   // reduced to the unary fneg when it is visited, and we probably want to deal
20358   // with fneg in a target-specific way.
20359   if (BOpcode == ISD::FSUB) {
20360     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20361     if (C && C->getValueAPF().isNegZero())
20362       return SDValue();
20363   }
20364 
20365   // The binop must be a vector type, so we can extract some fraction of it.
20366   EVT WideBVT = BinOp.getValueType();
20367   // The optimisations below currently assume we are dealing with fixed length
20368   // vectors. It is possible to add support for scalable vectors, but at the
20369   // moment we've done no analysis to prove whether they are profitable or not.
20370   if (!WideBVT.isFixedLengthVector())
20371     return SDValue();
20372 
20373   EVT VT = Extract->getValueType(0);
20374   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20375   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
20376          "Extract index is not a multiple of the vector length.");
20377 
20378   // Bail out if this is not a proper multiple width extraction.
20379   unsigned WideWidth = WideBVT.getSizeInBits();
20380   unsigned NarrowWidth = VT.getSizeInBits();
20381   if (WideWidth % NarrowWidth != 0)
20382     return SDValue();
20383 
20384   // Bail out if we are extracting a fraction of a single operation. This can
20385   // occur because we potentially looked through a bitcast of the binop.
20386   unsigned NarrowingRatio = WideWidth / NarrowWidth;
20387   unsigned WideNumElts = WideBVT.getVectorNumElements();
20388   if (WideNumElts % NarrowingRatio != 0)
20389     return SDValue();
20390 
20391   // Bail out if the target does not support a narrower version of the binop.
20392   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20393                                    WideNumElts / NarrowingRatio);
20394   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20395     return SDValue();
20396 
20397   // If extraction is cheap, we don't need to look at the binop operands
20398   // for concat ops. The narrow binop alone makes this transform profitable.
20399   // We can't just reuse the original extract index operand because we may have
20400   // bitcasted.
20401   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20402   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20403   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20404       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20405     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20406     SDLoc DL(Extract);
20407     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20408     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20409                             BinOp.getOperand(0), NewExtIndex);
20410     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20411                             BinOp.getOperand(1), NewExtIndex);
20412     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20413                                       BinOp.getNode()->getFlags());
20414     return DAG.getBitcast(VT, NarrowBinOp);
20415   }
20416 
20417   // Only handle the case where we are doubling and then halving. A larger ratio
20418   // may require more than two narrow binops to replace the wide binop.
20419   if (NarrowingRatio != 2)
20420     return SDValue();
20421 
20422   // TODO: The motivating case for this transform is an x86 AVX1 target. That
20423   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20424   // flavors, but no other 256-bit integer support. This could be extended to
20425   // handle any binop, but that may require fixing/adding other folds to avoid
20426   // codegen regressions.
20427   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20428     return SDValue();
20429 
20430   // We need at least one concatenation operation of a binop operand to make
20431   // this transform worthwhile. The concat must double the input vector sizes.
20432   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20433     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20434       return V.getOperand(ConcatOpNum);
20435     return SDValue();
20436   };
20437   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20438   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20439 
20440   if (SubVecL || SubVecR) {
20441     // If a binop operand was not the result of a concat, we must extract a
20442     // half-sized operand for our new narrow binop:
20443     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20444     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20445     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20446     SDLoc DL(Extract);
20447     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20448     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20449                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20450                                       BinOp.getOperand(0), IndexC);
20451 
20452     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20453                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20454                                       BinOp.getOperand(1), IndexC);
20455 
20456     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20457     return DAG.getBitcast(VT, NarrowBinOp);
20458   }
20459 
20460   return SDValue();
20461 }
20462 
20463 /// If we are extracting a subvector from a wide vector load, convert to a
20464 /// narrow load to eliminate the extraction:
20465 /// (extract_subvector (load wide vector)) --> (load narrow vector)
20466 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20467   // TODO: Add support for big-endian. The offset calculation must be adjusted.
20468   if (DAG.getDataLayout().isBigEndian())
20469     return SDValue();
20470 
20471   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20472   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20473   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
20474       !ExtIdx)
20475     return SDValue();
20476 
20477   // Allow targets to opt-out.
20478   EVT VT = Extract->getValueType(0);
20479 
20480   // We can only create byte sized loads.
20481   if (!VT.isByteSized())
20482     return SDValue();
20483 
20484   unsigned Index = ExtIdx->getZExtValue();
20485   unsigned NumElts = VT.getVectorMinNumElements();
20486 
20487   // The definition of EXTRACT_SUBVECTOR states that the index must be a
20488   // multiple of the minimum number of elements in the result type.
20489   assert(Index % NumElts == 0 && "The extract subvector index is not a "
20490                                  "multiple of the result's element count");
20491 
20492   // It's fine to use TypeSize here as we know the offset will not be negative.
20493   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20494 
20495   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20496   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20497     return SDValue();
20498 
20499   // The narrow load will be offset from the base address of the old load if
20500   // we are extracting from something besides index 0 (little-endian).
20501   SDLoc DL(Extract);
20502 
20503   // TODO: Use "BaseIndexOffset" to make this more effective.
20504   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20505 
20506   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20507   MachineFunction &MF = DAG.getMachineFunction();
20508   MachineMemOperand *MMO;
20509   if (Offset.isScalable()) {
20510     MachinePointerInfo MPI =
20511         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20512     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20513   } else
20514     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20515                                   StoreSize);
20516 
20517   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20518   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20519   return NewLd;
20520 }
20521 
20522 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20523   EVT NVT = N->getValueType(0);
20524   SDValue V = N->getOperand(0);
20525   uint64_t ExtIdx = N->getConstantOperandVal(1);
20526 
20527   // Extract from UNDEF is UNDEF.
20528   if (V.isUndef())
20529     return DAG.getUNDEF(NVT);
20530 
20531   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20532     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20533       return NarrowLoad;
20534 
20535   // Combine an extract of an extract into a single extract_subvector.
20536   // ext (ext X, C), 0 --> ext X, C
20537   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20538     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20539                                     V.getConstantOperandVal(1)) &&
20540         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20541       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20542                          V.getOperand(1));
20543     }
20544   }
20545 
20546   // Try to move vector bitcast after extract_subv by scaling extraction index:
20547   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20548   if (V.getOpcode() == ISD::BITCAST &&
20549       V.getOperand(0).getValueType().isVector() &&
20550       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
20551     SDValue SrcOp = V.getOperand(0);
20552     EVT SrcVT = SrcOp.getValueType();
20553     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20554     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20555     if ((SrcNumElts % DestNumElts) == 0) {
20556       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20557       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20558       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20559                                       NewExtEC);
20560       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20561         SDLoc DL(N);
20562         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20563         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20564                                          V.getOperand(0), NewIndex);
20565         return DAG.getBitcast(NVT, NewExtract);
20566       }
20567     }
20568     if ((DestNumElts % SrcNumElts) == 0) {
20569       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20570       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20571         ElementCount NewExtEC =
20572             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20573         EVT ScalarVT = SrcVT.getScalarType();
20574         if ((ExtIdx % DestSrcRatio) == 0) {
20575           SDLoc DL(N);
20576           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20577           EVT NewExtVT =
20578               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20579           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20580             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20581             SDValue NewExtract =
20582                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20583                             V.getOperand(0), NewIndex);
20584             return DAG.getBitcast(NVT, NewExtract);
20585           }
20586           if (NewExtEC.isScalar() &&
20587               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20588             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20589             SDValue NewExtract =
20590                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20591                             V.getOperand(0), NewIndex);
20592             return DAG.getBitcast(NVT, NewExtract);
20593           }
20594         }
20595       }
20596     }
20597   }
20598 
20599   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
20600     unsigned ExtNumElts = NVT.getVectorMinNumElements();
20601     EVT ConcatSrcVT = V.getOperand(0).getValueType();
20602     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
20603            "Concat and extract subvector do not change element type");
20604     assert((ExtIdx % ExtNumElts) == 0 &&
20605            "Extract index is not a multiple of the input vector length.");
20606 
20607     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
20608     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
20609 
20610     // If the concatenated source types match this extract, it's a direct
20611     // simplification:
20612     // extract_subvec (concat V1, V2, ...), i --> Vi
20613     if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
20614       return V.getOperand(ConcatOpIdx);
20615 
20616     // If the concatenated source vectors are a multiple length of this extract,
20617     // then extract a fraction of one of those source vectors directly from a
20618     // concat operand. Example:
20619     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
20620     //   v2i8 extract_subvec v8i8 Y, 6
20621     if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
20622         ConcatSrcNumElts % ExtNumElts == 0) {
20623       SDLoc DL(N);
20624       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
20625       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
20626              "Trying to extract from >1 concat operand?");
20627       assert(NewExtIdx % ExtNumElts == 0 &&
20628              "Extract index is not a multiple of the input vector length.");
20629       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
20630       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
20631                          V.getOperand(ConcatOpIdx), NewIndexC);
20632     }
20633   }
20634 
20635   V = peekThroughBitcasts(V);
20636 
20637   // If the input is a build vector. Try to make a smaller build vector.
20638   if (V.getOpcode() == ISD::BUILD_VECTOR) {
20639     EVT InVT = V.getValueType();
20640     unsigned ExtractSize = NVT.getSizeInBits();
20641     unsigned EltSize = InVT.getScalarSizeInBits();
20642     // Only do this if we won't split any elements.
20643     if (ExtractSize % EltSize == 0) {
20644       unsigned NumElems = ExtractSize / EltSize;
20645       EVT EltVT = InVT.getVectorElementType();
20646       EVT ExtractVT =
20647           NumElems == 1 ? EltVT
20648                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
20649       if ((Level < AfterLegalizeDAG ||
20650            (NumElems == 1 ||
20651             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
20652           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
20653         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
20654 
20655         if (NumElems == 1) {
20656           SDValue Src = V->getOperand(IdxVal);
20657           if (EltVT != Src.getValueType())
20658             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
20659           return DAG.getBitcast(NVT, Src);
20660         }
20661 
20662         // Extract the pieces from the original build_vector.
20663         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
20664                                               V->ops().slice(IdxVal, NumElems));
20665         return DAG.getBitcast(NVT, BuildVec);
20666       }
20667     }
20668   }
20669 
20670   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
20671     // Handle only simple case where vector being inserted and vector
20672     // being extracted are of same size.
20673     EVT SmallVT = V.getOperand(1).getValueType();
20674     if (!NVT.bitsEq(SmallVT))
20675       return SDValue();
20676 
20677     // Combine:
20678     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
20679     // Into:
20680     //    indices are equal or bit offsets are equal => V1
20681     //    otherwise => (extract_subvec V1, ExtIdx)
20682     uint64_t InsIdx = V.getConstantOperandVal(2);
20683     if (InsIdx * SmallVT.getScalarSizeInBits() ==
20684         ExtIdx * NVT.getScalarSizeInBits()) {
20685       if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
20686         return SDValue();
20687 
20688       return DAG.getBitcast(NVT, V.getOperand(1));
20689     }
20690     return DAG.getNode(
20691         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
20692         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
20693         N->getOperand(1));
20694   }
20695 
20696   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
20697     return NarrowBOp;
20698 
20699   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20700     return SDValue(N, 0);
20701 
20702   return SDValue();
20703 }
20704 
20705 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
20706 /// followed by concatenation. Narrow vector ops may have better performance
20707 /// than wide ops, and this can unlock further narrowing of other vector ops.
20708 /// Targets can invert this transform later if it is not profitable.
20709 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
20710                                          SelectionDAG &DAG) {
20711   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
20712   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
20713       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
20714       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
20715     return SDValue();
20716 
20717   // Split the wide shuffle mask into halves. Any mask element that is accessing
20718   // operand 1 is offset down to account for narrowing of the vectors.
20719   ArrayRef<int> Mask = Shuf->getMask();
20720   EVT VT = Shuf->getValueType(0);
20721   unsigned NumElts = VT.getVectorNumElements();
20722   unsigned HalfNumElts = NumElts / 2;
20723   SmallVector<int, 16> Mask0(HalfNumElts, -1);
20724   SmallVector<int, 16> Mask1(HalfNumElts, -1);
20725   for (unsigned i = 0; i != NumElts; ++i) {
20726     if (Mask[i] == -1)
20727       continue;
20728     // If we reference the upper (undef) subvector then the element is undef.
20729     if ((Mask[i] % NumElts) >= HalfNumElts)
20730       continue;
20731     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
20732     if (i < HalfNumElts)
20733       Mask0[i] = M;
20734     else
20735       Mask1[i - HalfNumElts] = M;
20736   }
20737 
20738   // Ask the target if this is a valid transform.
20739   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20740   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
20741                                 HalfNumElts);
20742   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
20743       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
20744     return SDValue();
20745 
20746   // shuffle (concat X, undef), (concat Y, undef), Mask -->
20747   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
20748   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
20749   SDLoc DL(Shuf);
20750   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
20751   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
20752   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
20753 }
20754 
20755 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
20756 // or turn a shuffle of a single concat into simpler shuffle then concat.
20757 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
20758   EVT VT = N->getValueType(0);
20759   unsigned NumElts = VT.getVectorNumElements();
20760 
20761   SDValue N0 = N->getOperand(0);
20762   SDValue N1 = N->getOperand(1);
20763   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20764   ArrayRef<int> Mask = SVN->getMask();
20765 
20766   SmallVector<SDValue, 4> Ops;
20767   EVT ConcatVT = N0.getOperand(0).getValueType();
20768   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
20769   unsigned NumConcats = NumElts / NumElemsPerConcat;
20770 
20771   auto IsUndefMaskElt = [](int i) { return i == -1; };
20772 
20773   // Special case: shuffle(concat(A,B)) can be more efficiently represented
20774   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
20775   // half vector elements.
20776   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
20777       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
20778                    IsUndefMaskElt)) {
20779     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
20780                               N0.getOperand(1),
20781                               Mask.slice(0, NumElemsPerConcat));
20782     N1 = DAG.getUNDEF(ConcatVT);
20783     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
20784   }
20785 
20786   // Look at every vector that's inserted. We're looking for exact
20787   // subvector-sized copies from a concatenated vector
20788   for (unsigned I = 0; I != NumConcats; ++I) {
20789     unsigned Begin = I * NumElemsPerConcat;
20790     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
20791 
20792     // Make sure we're dealing with a copy.
20793     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
20794       Ops.push_back(DAG.getUNDEF(ConcatVT));
20795       continue;
20796     }
20797 
20798     int OpIdx = -1;
20799     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
20800       if (IsUndefMaskElt(SubMask[i]))
20801         continue;
20802       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
20803         return SDValue();
20804       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
20805       if (0 <= OpIdx && EltOpIdx != OpIdx)
20806         return SDValue();
20807       OpIdx = EltOpIdx;
20808     }
20809     assert(0 <= OpIdx && "Unknown concat_vectors op");
20810 
20811     if (OpIdx < (int)N0.getNumOperands())
20812       Ops.push_back(N0.getOperand(OpIdx));
20813     else
20814       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
20815   }
20816 
20817   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20818 }
20819 
20820 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20821 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20822 //
20823 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
20824 // a simplification in some sense, but it isn't appropriate in general: some
20825 // BUILD_VECTORs are substantially cheaper than others. The general case
20826 // of a BUILD_VECTOR requires inserting each element individually (or
20827 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
20828 // all constants is a single constant pool load.  A BUILD_VECTOR where each
20829 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
20830 // are undef lowers to a small number of element insertions.
20831 //
20832 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
20833 // We don't fold shuffles where one side is a non-zero constant, and we don't
20834 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
20835 // non-constant operands. This seems to work out reasonably well in practice.
20836 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
20837                                        SelectionDAG &DAG,
20838                                        const TargetLowering &TLI) {
20839   EVT VT = SVN->getValueType(0);
20840   unsigned NumElts = VT.getVectorNumElements();
20841   SDValue N0 = SVN->getOperand(0);
20842   SDValue N1 = SVN->getOperand(1);
20843 
20844   if (!N0->hasOneUse())
20845     return SDValue();
20846 
20847   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
20848   // discussed above.
20849   if (!N1.isUndef()) {
20850     if (!N1->hasOneUse())
20851       return SDValue();
20852 
20853     bool N0AnyConst = isAnyConstantBuildVector(N0);
20854     bool N1AnyConst = isAnyConstantBuildVector(N1);
20855     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
20856       return SDValue();
20857     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
20858       return SDValue();
20859   }
20860 
20861   // If both inputs are splats of the same value then we can safely merge this
20862   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
20863   bool IsSplat = false;
20864   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
20865   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
20866   if (BV0 && BV1)
20867     if (SDValue Splat0 = BV0->getSplatValue())
20868       IsSplat = (Splat0 == BV1->getSplatValue());
20869 
20870   SmallVector<SDValue, 8> Ops;
20871   SmallSet<SDValue, 16> DuplicateOps;
20872   for (int M : SVN->getMask()) {
20873     SDValue Op = DAG.getUNDEF(VT.getScalarType());
20874     if (M >= 0) {
20875       int Idx = M < (int)NumElts ? M : M - NumElts;
20876       SDValue &S = (M < (int)NumElts ? N0 : N1);
20877       if (S.getOpcode() == ISD::BUILD_VECTOR) {
20878         Op = S.getOperand(Idx);
20879       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20880         SDValue Op0 = S.getOperand(0);
20881         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
20882       } else {
20883         // Operand can't be combined - bail out.
20884         return SDValue();
20885       }
20886     }
20887 
20888     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
20889     // generating a splat; semantically, this is fine, but it's likely to
20890     // generate low-quality code if the target can't reconstruct an appropriate
20891     // shuffle.
20892     if (!Op.isUndef() && !isIntOrFPConstant(Op))
20893       if (!IsSplat && !DuplicateOps.insert(Op).second)
20894         return SDValue();
20895 
20896     Ops.push_back(Op);
20897   }
20898 
20899   // BUILD_VECTOR requires all inputs to be of the same type, find the
20900   // maximum type and extend them all.
20901   EVT SVT = VT.getScalarType();
20902   if (SVT.isInteger())
20903     for (SDValue &Op : Ops)
20904       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
20905   if (SVT != VT.getScalarType())
20906     for (SDValue &Op : Ops)
20907       Op = TLI.isZExtFree(Op.getValueType(), SVT)
20908                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
20909                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
20910   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
20911 }
20912 
20913 // Match shuffles that can be converted to any_vector_extend_in_reg.
20914 // This is often generated during legalization.
20915 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
20916 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
20917 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
20918                                             SelectionDAG &DAG,
20919                                             const TargetLowering &TLI,
20920                                             bool LegalOperations) {
20921   EVT VT = SVN->getValueType(0);
20922   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20923 
20924   // TODO Add support for big-endian when we have a test case.
20925   if (!VT.isInteger() || IsBigEndian)
20926     return SDValue();
20927 
20928   unsigned NumElts = VT.getVectorNumElements();
20929   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20930   ArrayRef<int> Mask = SVN->getMask();
20931   SDValue N0 = SVN->getOperand(0);
20932 
20933   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
20934   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
20935     for (unsigned i = 0; i != NumElts; ++i) {
20936       if (Mask[i] < 0)
20937         continue;
20938       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
20939         continue;
20940       return false;
20941     }
20942     return true;
20943   };
20944 
20945   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
20946   // power-of-2 extensions as they are the most likely.
20947   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
20948     // Check for non power of 2 vector sizes
20949     if (NumElts % Scale != 0)
20950       continue;
20951     if (!isAnyExtend(Scale))
20952       continue;
20953 
20954     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
20955     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
20956     // Never create an illegal type. Only create unsupported operations if we
20957     // are pre-legalization.
20958     if (TLI.isTypeLegal(OutVT))
20959       if (!LegalOperations ||
20960           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
20961         return DAG.getBitcast(VT,
20962                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
20963                                           SDLoc(SVN), OutVT, N0));
20964   }
20965 
20966   return SDValue();
20967 }
20968 
20969 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
20970 // each source element of a large type into the lowest elements of a smaller
20971 // destination type. This is often generated during legalization.
20972 // If the source node itself was a '*_extend_vector_inreg' node then we should
20973 // then be able to remove it.
20974 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
20975                                         SelectionDAG &DAG) {
20976   EVT VT = SVN->getValueType(0);
20977   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20978 
20979   // TODO Add support for big-endian when we have a test case.
20980   if (!VT.isInteger() || IsBigEndian)
20981     return SDValue();
20982 
20983   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20984 
20985   unsigned Opcode = N0.getOpcode();
20986   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20987       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20988       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20989     return SDValue();
20990 
20991   SDValue N00 = N0.getOperand(0);
20992   ArrayRef<int> Mask = SVN->getMask();
20993   unsigned NumElts = VT.getVectorNumElements();
20994   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20995   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20996   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20997 
20998   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20999     return SDValue();
21000   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
21001 
21002   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
21003   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
21004   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
21005   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
21006     for (unsigned i = 0; i != NumElts; ++i) {
21007       if (Mask[i] < 0)
21008         continue;
21009       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
21010         continue;
21011       return false;
21012     }
21013     return true;
21014   };
21015 
21016   // At the moment we just handle the case where we've truncated back to the
21017   // same size as before the extension.
21018   // TODO: handle more extension/truncation cases as cases arise.
21019   if (EltSizeInBits != ExtSrcSizeInBits)
21020     return SDValue();
21021 
21022   // We can remove *extend_vector_inreg only if the truncation happens at
21023   // the same scale as the extension.
21024   if (isTruncate(ExtScale))
21025     return DAG.getBitcast(VT, N00);
21026 
21027   return SDValue();
21028 }
21029 
21030 // Combine shuffles of splat-shuffles of the form:
21031 // shuffle (shuffle V, undef, splat-mask), undef, M
21032 // If splat-mask contains undef elements, we need to be careful about
21033 // introducing undef's in the folded mask which are not the result of composing
21034 // the masks of the shuffles.
21035 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
21036                                         SelectionDAG &DAG) {
21037   if (!Shuf->getOperand(1).isUndef())
21038     return SDValue();
21039   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21040   if (!Splat || !Splat->isSplat())
21041     return SDValue();
21042 
21043   ArrayRef<int> ShufMask = Shuf->getMask();
21044   ArrayRef<int> SplatMask = Splat->getMask();
21045   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
21046 
21047   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
21048   // every undef mask element in the splat-shuffle has a corresponding undef
21049   // element in the user-shuffle's mask or if the composition of mask elements
21050   // would result in undef.
21051   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21052   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21053   //   In this case it is not legal to simplify to the splat-shuffle because we
21054   //   may be exposing the users of the shuffle an undef element at index 1
21055   //   which was not there before the combine.
21056   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21057   //   In this case the composition of masks yields SplatMask, so it's ok to
21058   //   simplify to the splat-shuffle.
21059   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21060   //   In this case the composed mask includes all undef elements of SplatMask
21061   //   and in addition sets element zero to undef. It is safe to simplify to
21062   //   the splat-shuffle.
21063   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
21064                                        ArrayRef<int> SplatMask) {
21065     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21066       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21067           SplatMask[UserMask[i]] != -1)
21068         return false;
21069     return true;
21070   };
21071   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
21072     return Shuf->getOperand(0);
21073 
21074   // Create a new shuffle with a mask that is composed of the two shuffles'
21075   // masks.
21076   SmallVector<int, 32> NewMask;
21077   for (int Idx : ShufMask)
21078     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21079 
21080   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21081                               Splat->getOperand(0), Splat->getOperand(1),
21082                               NewMask);
21083 }
21084 
21085 /// Combine shuffle of shuffle of the form:
21086 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21087 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
21088                                      SelectionDAG &DAG) {
21089   if (!OuterShuf->getOperand(1).isUndef())
21090     return SDValue();
21091   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21092   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21093     return SDValue();
21094 
21095   ArrayRef<int> OuterMask = OuterShuf->getMask();
21096   ArrayRef<int> InnerMask = InnerShuf->getMask();
21097   unsigned NumElts = OuterMask.size();
21098   assert(NumElts == InnerMask.size() && "Mask length mismatch");
21099   SmallVector<int, 32> CombinedMask(NumElts, -1);
21100   int SplatIndex = -1;
21101   for (unsigned i = 0; i != NumElts; ++i) {
21102     // Undef lanes remain undef.
21103     int OuterMaskElt = OuterMask[i];
21104     if (OuterMaskElt == -1)
21105       continue;
21106 
21107     // Peek through the shuffle masks to get the underlying source element.
21108     int InnerMaskElt = InnerMask[OuterMaskElt];
21109     if (InnerMaskElt == -1)
21110       continue;
21111 
21112     // Initialize the splatted element.
21113     if (SplatIndex == -1)
21114       SplatIndex = InnerMaskElt;
21115 
21116     // Non-matching index - this is not a splat.
21117     if (SplatIndex != InnerMaskElt)
21118       return SDValue();
21119 
21120     CombinedMask[i] = InnerMaskElt;
21121   }
21122   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
21123           getSplatIndex(CombinedMask) != -1) &&
21124          "Expected a splat mask");
21125 
21126   // TODO: The transform may be a win even if the mask is not legal.
21127   EVT VT = OuterShuf->getValueType(0);
21128   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
21129   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
21130     return SDValue();
21131 
21132   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21133                               InnerShuf->getOperand(1), CombinedMask);
21134 }
21135 
21136 /// If the shuffle mask is taking exactly one element from the first vector
21137 /// operand and passing through all other elements from the second vector
21138 /// operand, return the index of the mask element that is choosing an element
21139 /// from the first operand. Otherwise, return -1.
21140 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
21141   int MaskSize = Mask.size();
21142   int EltFromOp0 = -1;
21143   // TODO: This does not match if there are undef elements in the shuffle mask.
21144   // Should we ignore undefs in the shuffle mask instead? The trade-off is
21145   // removing an instruction (a shuffle), but losing the knowledge that some
21146   // vector lanes are not needed.
21147   for (int i = 0; i != MaskSize; ++i) {
21148     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21149       // We're looking for a shuffle of exactly one element from operand 0.
21150       if (EltFromOp0 != -1)
21151         return -1;
21152       EltFromOp0 = i;
21153     } else if (Mask[i] != i + MaskSize) {
21154       // Nothing from operand 1 can change lanes.
21155       return -1;
21156     }
21157   }
21158   return EltFromOp0;
21159 }
21160 
21161 /// If a shuffle inserts exactly one element from a source vector operand into
21162 /// another vector operand and we can access the specified element as a scalar,
21163 /// then we can eliminate the shuffle.
21164 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
21165                                       SelectionDAG &DAG) {
21166   // First, check if we are taking one element of a vector and shuffling that
21167   // element into another vector.
21168   ArrayRef<int> Mask = Shuf->getMask();
21169   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21170   SDValue Op0 = Shuf->getOperand(0);
21171   SDValue Op1 = Shuf->getOperand(1);
21172   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
21173   if (ShufOp0Index == -1) {
21174     // Commute mask and check again.
21175     ShuffleVectorSDNode::commuteMask(CommutedMask);
21176     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
21177     if (ShufOp0Index == -1)
21178       return SDValue();
21179     // Commute operands to match the commuted shuffle mask.
21180     std::swap(Op0, Op1);
21181     Mask = CommutedMask;
21182   }
21183 
21184   // The shuffle inserts exactly one element from operand 0 into operand 1.
21185   // Now see if we can access that element as a scalar via a real insert element
21186   // instruction.
21187   // TODO: We can try harder to locate the element as a scalar. Examples: it
21188   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21189   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
21190          "Shuffle mask value must be from operand 0");
21191   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21192     return SDValue();
21193 
21194   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21195   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21196     return SDValue();
21197 
21198   // There's an existing insertelement with constant insertion index, so we
21199   // don't need to check the legality/profitability of a replacement operation
21200   // that differs at most in the constant value. The target should be able to
21201   // lower any of those in a similar way. If not, legalization will expand this
21202   // to a scalar-to-vector plus shuffle.
21203   //
21204   // Note that the shuffle may move the scalar from the position that the insert
21205   // element used. Therefore, our new insert element occurs at the shuffle's
21206   // mask index value, not the insert's index value.
21207   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21208   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21209   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21210                      Op1, Op0.getOperand(1), NewInsIndex);
21211 }
21212 
21213 /// If we have a unary shuffle of a shuffle, see if it can be folded away
21214 /// completely. This has the potential to lose undef knowledge because the first
21215 /// shuffle may not have an undef mask element where the second one does. So
21216 /// only call this after doing simplifications based on demanded elements.
21217 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
21218   // shuf (shuf0 X, Y, Mask0), undef, Mask
21219   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21220   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21221     return SDValue();
21222 
21223   ArrayRef<int> Mask = Shuf->getMask();
21224   ArrayRef<int> Mask0 = Shuf0->getMask();
21225   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21226     // Ignore undef elements.
21227     if (Mask[i] == -1)
21228       continue;
21229     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
21230 
21231     // Is the element of the shuffle operand chosen by this shuffle the same as
21232     // the element chosen by the shuffle operand itself?
21233     if (Mask0[Mask[i]] != Mask0[i])
21234       return SDValue();
21235   }
21236   // Every element of this shuffle is identical to the result of the previous
21237   // shuffle, so we can replace this value.
21238   return Shuf->getOperand(0);
21239 }
21240 
21241 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21242   EVT VT = N->getValueType(0);
21243   unsigned NumElts = VT.getVectorNumElements();
21244 
21245   SDValue N0 = N->getOperand(0);
21246   SDValue N1 = N->getOperand(1);
21247 
21248   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
21249 
21250   // Canonicalize shuffle undef, undef -> undef
21251   if (N0.isUndef() && N1.isUndef())
21252     return DAG.getUNDEF(VT);
21253 
21254   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21255 
21256   // Canonicalize shuffle v, v -> v, undef
21257   if (N0 == N1) {
21258     SmallVector<int, 8> NewMask;
21259     for (unsigned i = 0; i != NumElts; ++i) {
21260       int Idx = SVN->getMaskElt(i);
21261       if (Idx >= (int)NumElts) Idx -= NumElts;
21262       NewMask.push_back(Idx);
21263     }
21264     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
21265   }
21266 
21267   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
21268   if (N0.isUndef())
21269     return DAG.getCommutedVectorShuffle(*SVN);
21270 
21271   // Remove references to rhs if it is undef
21272   if (N1.isUndef()) {
21273     bool Changed = false;
21274     SmallVector<int, 8> NewMask;
21275     for (unsigned i = 0; i != NumElts; ++i) {
21276       int Idx = SVN->getMaskElt(i);
21277       if (Idx >= (int)NumElts) {
21278         Idx = -1;
21279         Changed = true;
21280       }
21281       NewMask.push_back(Idx);
21282     }
21283     if (Changed)
21284       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21285   }
21286 
21287   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
21288     return InsElt;
21289 
21290   // A shuffle of a single vector that is a splatted value can always be folded.
21291   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21292     return V;
21293 
21294   if (SDValue V = formSplatFromShuffles(SVN, DAG))
21295     return V;
21296 
21297   // If it is a splat, check if the argument vector is another splat or a
21298   // build_vector.
21299   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21300     int SplatIndex = SVN->getSplatIndex();
21301     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21302         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21303       // splat (vector_bo L, R), Index -->
21304       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21305       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21306       SDLoc DL(N);
21307       EVT EltVT = VT.getScalarType();
21308       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21309       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
21310       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
21311       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21312                                   N0.getNode()->getFlags());
21313       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
21314       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
21315       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21316     }
21317 
21318     // If this is a bit convert that changes the element type of the vector but
21319     // not the number of vector elements, look through it.  Be careful not to
21320     // look though conversions that change things like v4f32 to v2f64.
21321     SDNode *V = N0.getNode();
21322     if (V->getOpcode() == ISD::BITCAST) {
21323       SDValue ConvInput = V->getOperand(0);
21324       if (ConvInput.getValueType().isVector() &&
21325           ConvInput.getValueType().getVectorNumElements() == NumElts)
21326         V = ConvInput.getNode();
21327     }
21328 
21329     if (V->getOpcode() == ISD::BUILD_VECTOR) {
21330       assert(V->getNumOperands() == NumElts &&
21331              "BUILD_VECTOR has wrong number of operands");
21332       SDValue Base;
21333       bool AllSame = true;
21334       for (unsigned i = 0; i != NumElts; ++i) {
21335         if (!V->getOperand(i).isUndef()) {
21336           Base = V->getOperand(i);
21337           break;
21338         }
21339       }
21340       // Splat of <u, u, u, u>, return <u, u, u, u>
21341       if (!Base.getNode())
21342         return N0;
21343       for (unsigned i = 0; i != NumElts; ++i) {
21344         if (V->getOperand(i) != Base) {
21345           AllSame = false;
21346           break;
21347         }
21348       }
21349       // Splat of <x, x, x, x>, return <x, x, x, x>
21350       if (AllSame)
21351         return N0;
21352 
21353       // Canonicalize any other splat as a build_vector.
21354       SDValue Splatted = V->getOperand(SplatIndex);
21355       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21356       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21357 
21358       // We may have jumped through bitcasts, so the type of the
21359       // BUILD_VECTOR may not match the type of the shuffle.
21360       if (V->getValueType(0) != VT)
21361         NewBV = DAG.getBitcast(VT, NewBV);
21362       return NewBV;
21363     }
21364   }
21365 
21366   // Simplify source operands based on shuffle mask.
21367   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21368     return SDValue(N, 0);
21369 
21370   // This is intentionally placed after demanded elements simplification because
21371   // it could eliminate knowledge of undef elements created by this shuffle.
21372   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21373     return ShufOp;
21374 
21375   // Match shuffles that can be converted to any_vector_extend_in_reg.
21376   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21377     return V;
21378 
21379   // Combine "truncate_vector_in_reg" style shuffles.
21380   if (SDValue V = combineTruncationShuffle(SVN, DAG))
21381     return V;
21382 
21383   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21384       Level < AfterLegalizeVectorOps &&
21385       (N1.isUndef() ||
21386       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21387        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21388     if (SDValue V = partitionShuffleOfConcats(N, DAG))
21389       return V;
21390   }
21391 
21392   // A shuffle of a concat of the same narrow vector can be reduced to use
21393   // only low-half elements of a concat with undef:
21394   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21395   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21396       N0.getNumOperands() == 2 &&
21397       N0.getOperand(0) == N0.getOperand(1)) {
21398     int HalfNumElts = (int)NumElts / 2;
21399     SmallVector<int, 8> NewMask;
21400     for (unsigned i = 0; i != NumElts; ++i) {
21401       int Idx = SVN->getMaskElt(i);
21402       if (Idx >= HalfNumElts) {
21403         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
21404         Idx -= HalfNumElts;
21405       }
21406       NewMask.push_back(Idx);
21407     }
21408     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21409       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21410       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21411                                    N0.getOperand(0), UndefVec);
21412       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21413     }
21414   }
21415 
21416   // See if we can replace a shuffle with an insert_subvector.
21417   // e.g. v2i32 into v8i32:
21418   // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
21419   // --> insert_subvector(lhs,rhs1,4).
21420   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
21421       TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
21422     auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
21423       // Ensure RHS subvectors are legal.
21424       assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
21425       EVT SubVT = RHS.getOperand(0).getValueType();
21426       int NumSubVecs = RHS.getNumOperands();
21427       int NumSubElts = SubVT.getVectorNumElements();
21428       assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
21429       if (!TLI.isTypeLegal(SubVT))
21430         return SDValue();
21431 
21432       // Don't bother if we have an unary shuffle (matches undef + LHS elts).
21433       if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
21434         return SDValue();
21435 
21436       // Search [NumSubElts] spans for RHS sequence.
21437       // TODO: Can we avoid nested loops to increase performance?
21438       SmallVector<int> InsertionMask(NumElts);
21439       for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
21440         for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
21441           // Reset mask to identity.
21442           std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
21443 
21444           // Add subvector insertion.
21445           std::iota(InsertionMask.begin() + SubIdx,
21446                     InsertionMask.begin() + SubIdx + NumSubElts,
21447                     NumElts + (SubVec * NumSubElts));
21448 
21449           // See if the shuffle mask matches the reference insertion mask.
21450           bool MatchingShuffle = true;
21451           for (int i = 0; i != (int)NumElts; ++i) {
21452             int ExpectIdx = InsertionMask[i];
21453             int ActualIdx = Mask[i];
21454             if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
21455               MatchingShuffle = false;
21456               break;
21457             }
21458           }
21459 
21460           if (MatchingShuffle)
21461             return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
21462                                RHS.getOperand(SubVec),
21463                                DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
21464         }
21465       }
21466       return SDValue();
21467     };
21468     ArrayRef<int> Mask = SVN->getMask();
21469     if (N1.getOpcode() == ISD::CONCAT_VECTORS)
21470       if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
21471         return InsertN1;
21472     if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
21473       SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
21474       ShuffleVectorSDNode::commuteMask(CommuteMask);
21475       if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
21476         return InsertN0;
21477     }
21478   }
21479 
21480   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21481   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21482   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
21483     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
21484       return Res;
21485 
21486   // If this shuffle only has a single input that is a bitcasted shuffle,
21487   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
21488   // back to their original types.
21489   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
21490       N1.isUndef() && Level < AfterLegalizeVectorOps &&
21491       TLI.isTypeLegal(VT)) {
21492 
21493     SDValue BC0 = peekThroughOneUseBitcasts(N0);
21494     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21495       EVT SVT = VT.getScalarType();
21496       EVT InnerVT = BC0->getValueType(0);
21497       EVT InnerSVT = InnerVT.getScalarType();
21498 
21499       // Determine which shuffle works with the smaller scalar type.
21500       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21501       EVT ScaleSVT = ScaleVT.getScalarType();
21502 
21503       if (TLI.isTypeLegal(ScaleVT) &&
21504           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21505           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21506         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21507         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21508 
21509         // Scale the shuffle masks to the smaller scalar type.
21510         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21511         SmallVector<int, 8> InnerMask;
21512         SmallVector<int, 8> OuterMask;
21513         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21514         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21515 
21516         // Merge the shuffle masks.
21517         SmallVector<int, 8> NewMask;
21518         for (int M : OuterMask)
21519           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21520 
21521         // Test for shuffle mask legality over both commutations.
21522         SDValue SV0 = BC0->getOperand(0);
21523         SDValue SV1 = BC0->getOperand(1);
21524         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21525         if (!LegalMask) {
21526           std::swap(SV0, SV1);
21527           ShuffleVectorSDNode::commuteMask(NewMask);
21528           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21529         }
21530 
21531         if (LegalMask) {
21532           SV0 = DAG.getBitcast(ScaleVT, SV0);
21533           SV1 = DAG.getBitcast(ScaleVT, SV1);
21534           return DAG.getBitcast(
21535               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21536         }
21537       }
21538     }
21539   }
21540 
21541   // Compute the combined shuffle mask for a shuffle with SV0 as the first
21542   // operand, and SV1 as the second operand.
21543   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21544   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21545   auto MergeInnerShuffle =
21546       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21547                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
21548                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21549                      SmallVectorImpl<int> &Mask) -> bool {
21550     // Don't try to fold splats; they're likely to simplify somehow, or they
21551     // might be free.
21552     if (OtherSVN->isSplat())
21553       return false;
21554 
21555     SV0 = SV1 = SDValue();
21556     Mask.clear();
21557 
21558     for (unsigned i = 0; i != NumElts; ++i) {
21559       int Idx = SVN->getMaskElt(i);
21560       if (Idx < 0) {
21561         // Propagate Undef.
21562         Mask.push_back(Idx);
21563         continue;
21564       }
21565 
21566       if (Commute)
21567         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21568 
21569       SDValue CurrentVec;
21570       if (Idx < (int)NumElts) {
21571         // This shuffle index refers to the inner shuffle N0. Lookup the inner
21572         // shuffle mask to identify which vector is actually referenced.
21573         Idx = OtherSVN->getMaskElt(Idx);
21574         if (Idx < 0) {
21575           // Propagate Undef.
21576           Mask.push_back(Idx);
21577           continue;
21578         }
21579         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21580                                           : OtherSVN->getOperand(1);
21581       } else {
21582         // This shuffle index references an element within N1.
21583         CurrentVec = N1;
21584       }
21585 
21586       // Simple case where 'CurrentVec' is UNDEF.
21587       if (CurrentVec.isUndef()) {
21588         Mask.push_back(-1);
21589         continue;
21590       }
21591 
21592       // Canonicalize the shuffle index. We don't know yet if CurrentVec
21593       // will be the first or second operand of the combined shuffle.
21594       Idx = Idx % NumElts;
21595       if (!SV0.getNode() || SV0 == CurrentVec) {
21596         // Ok. CurrentVec is the left hand side.
21597         // Update the mask accordingly.
21598         SV0 = CurrentVec;
21599         Mask.push_back(Idx);
21600         continue;
21601       }
21602       if (!SV1.getNode() || SV1 == CurrentVec) {
21603         // Ok. CurrentVec is the right hand side.
21604         // Update the mask accordingly.
21605         SV1 = CurrentVec;
21606         Mask.push_back(Idx + NumElts);
21607         continue;
21608       }
21609 
21610       // Last chance - see if the vector is another shuffle and if it
21611       // uses one of the existing candidate shuffle ops.
21612       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
21613         int InnerIdx = CurrentSVN->getMaskElt(Idx);
21614         if (InnerIdx < 0) {
21615           Mask.push_back(-1);
21616           continue;
21617         }
21618         SDValue InnerVec = (InnerIdx < (int)NumElts)
21619                                ? CurrentSVN->getOperand(0)
21620                                : CurrentSVN->getOperand(1);
21621         if (InnerVec.isUndef()) {
21622           Mask.push_back(-1);
21623           continue;
21624         }
21625         InnerIdx %= NumElts;
21626         if (InnerVec == SV0) {
21627           Mask.push_back(InnerIdx);
21628           continue;
21629         }
21630         if (InnerVec == SV1) {
21631           Mask.push_back(InnerIdx + NumElts);
21632           continue;
21633         }
21634       }
21635 
21636       // Bail out if we cannot convert the shuffle pair into a single shuffle.
21637       return false;
21638     }
21639 
21640     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21641       return true;
21642 
21643     // Avoid introducing shuffles with illegal mask.
21644     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21645     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21646     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21647     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
21648     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
21649     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
21650     if (TLI.isShuffleMaskLegal(Mask, VT))
21651       return true;
21652 
21653     std::swap(SV0, SV1);
21654     ShuffleVectorSDNode::commuteMask(Mask);
21655     return TLI.isShuffleMaskLegal(Mask, VT);
21656   };
21657 
21658   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
21659     // Canonicalize shuffles according to rules:
21660     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
21661     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
21662     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
21663     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21664         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
21665       // The incoming shuffle must be of the same type as the result of the
21666       // current shuffle.
21667       assert(N1->getOperand(0).getValueType() == VT &&
21668              "Shuffle types don't match");
21669 
21670       SDValue SV0 = N1->getOperand(0);
21671       SDValue SV1 = N1->getOperand(1);
21672       bool HasSameOp0 = N0 == SV0;
21673       bool IsSV1Undef = SV1.isUndef();
21674       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
21675         // Commute the operands of this shuffle so merging below will trigger.
21676         return DAG.getCommutedVectorShuffle(*SVN);
21677     }
21678 
21679     // Canonicalize splat shuffles to the RHS to improve merging below.
21680     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
21681     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21682         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21683         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
21684         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
21685       return DAG.getCommutedVectorShuffle(*SVN);
21686     }
21687 
21688     // Try to fold according to rules:
21689     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21690     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21691     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21692     // Don't try to fold shuffles with illegal type.
21693     // Only fold if this shuffle is the only user of the other shuffle.
21694     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
21695     for (int i = 0; i != 2; ++i) {
21696       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
21697           N->isOnlyUserOf(N->getOperand(i).getNode())) {
21698         // The incoming shuffle must be of the same type as the result of the
21699         // current shuffle.
21700         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
21701         assert(OtherSV->getOperand(0).getValueType() == VT &&
21702                "Shuffle types don't match");
21703 
21704         SDValue SV0, SV1;
21705         SmallVector<int, 4> Mask;
21706         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
21707                               SV0, SV1, Mask)) {
21708           // Check if all indices in Mask are Undef. In case, propagate Undef.
21709           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21710             return DAG.getUNDEF(VT);
21711 
21712           return DAG.getVectorShuffle(VT, SDLoc(N),
21713                                       SV0 ? SV0 : DAG.getUNDEF(VT),
21714                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21715         }
21716       }
21717     }
21718 
21719     // Merge shuffles through binops if we are able to merge it with at least
21720     // one other shuffles.
21721     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
21722     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
21723     unsigned SrcOpcode = N0.getOpcode();
21724     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
21725         (N1.isUndef() ||
21726          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
21727       // Get binop source ops, or just pass on the undef.
21728       SDValue Op00 = N0.getOperand(0);
21729       SDValue Op01 = N0.getOperand(1);
21730       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
21731       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
21732       // TODO: We might be able to relax the VT check but we don't currently
21733       // have any isBinOp() that has different result/ops VTs so play safe until
21734       // we have test coverage.
21735       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
21736           Op01.getValueType() == VT && Op11.getValueType() == VT &&
21737           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
21738            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
21739            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
21740            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
21741         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
21742                                         SmallVectorImpl<int> &Mask, bool LeftOp,
21743                                         bool Commute) {
21744           SDValue InnerN = Commute ? N1 : N0;
21745           SDValue Op0 = LeftOp ? Op00 : Op01;
21746           SDValue Op1 = LeftOp ? Op10 : Op11;
21747           if (Commute)
21748             std::swap(Op0, Op1);
21749           // Only accept the merged shuffle if we don't introduce undef elements,
21750           // or the inner shuffle already contained undef elements.
21751           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
21752           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
21753                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
21754                                    Mask) &&
21755                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
21756                   llvm::none_of(Mask, [](int M) { return M < 0; }));
21757         };
21758 
21759         // Ensure we don't increase the number of shuffles - we must merge a
21760         // shuffle from at least one of the LHS and RHS ops.
21761         bool MergedLeft = false;
21762         SDValue LeftSV0, LeftSV1;
21763         SmallVector<int, 4> LeftMask;
21764         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
21765             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
21766           MergedLeft = true;
21767         } else {
21768           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21769           LeftSV0 = Op00, LeftSV1 = Op10;
21770         }
21771 
21772         bool MergedRight = false;
21773         SDValue RightSV0, RightSV1;
21774         SmallVector<int, 4> RightMask;
21775         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
21776             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
21777           MergedRight = true;
21778         } else {
21779           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21780           RightSV0 = Op01, RightSV1 = Op11;
21781         }
21782 
21783         if (MergedLeft || MergedRight) {
21784           SDLoc DL(N);
21785           SDValue LHS = DAG.getVectorShuffle(
21786               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
21787               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
21788           SDValue RHS = DAG.getVectorShuffle(
21789               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
21790               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
21791           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
21792         }
21793       }
21794     }
21795   }
21796 
21797   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
21798     return V;
21799 
21800   return SDValue();
21801 }
21802 
21803 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
21804   SDValue InVal = N->getOperand(0);
21805   EVT VT = N->getValueType(0);
21806 
21807   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
21808   // with a VECTOR_SHUFFLE and possible truncate.
21809   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21810       VT.isFixedLengthVector() &&
21811       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
21812     SDValue InVec = InVal->getOperand(0);
21813     SDValue EltNo = InVal->getOperand(1);
21814     auto InVecT = InVec.getValueType();
21815     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
21816       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
21817       int Elt = C0->getZExtValue();
21818       NewMask[0] = Elt;
21819       // If we have an implict truncate do truncate here as long as it's legal.
21820       // if it's not legal, this should
21821       if (VT.getScalarType() != InVal.getValueType() &&
21822           InVal.getValueType().isScalarInteger() &&
21823           isTypeLegal(VT.getScalarType())) {
21824         SDValue Val =
21825             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
21826         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
21827       }
21828       if (VT.getScalarType() == InVecT.getScalarType() &&
21829           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
21830         SDValue LegalShuffle =
21831           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
21832                                       DAG.getUNDEF(InVecT), NewMask, DAG);
21833         if (LegalShuffle) {
21834           // If the initial vector is the correct size this shuffle is a
21835           // valid result.
21836           if (VT == InVecT)
21837             return LegalShuffle;
21838           // If not we must truncate the vector.
21839           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
21840             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
21841             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
21842                                          InVecT.getVectorElementType(),
21843                                          VT.getVectorNumElements());
21844             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
21845                                LegalShuffle, ZeroIdx);
21846           }
21847         }
21848       }
21849     }
21850   }
21851 
21852   return SDValue();
21853 }
21854 
21855 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
21856   EVT VT = N->getValueType(0);
21857   SDValue N0 = N->getOperand(0);
21858   SDValue N1 = N->getOperand(1);
21859   SDValue N2 = N->getOperand(2);
21860   uint64_t InsIdx = N->getConstantOperandVal(2);
21861 
21862   // If inserting an UNDEF, just return the original vector.
21863   if (N1.isUndef())
21864     return N0;
21865 
21866   // If this is an insert of an extracted vector into an undef vector, we can
21867   // just use the input to the extract.
21868   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21869       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
21870     return N1.getOperand(0);
21871 
21872   // If we are inserting a bitcast value into an undef, with the same
21873   // number of elements, just use the bitcast input of the extract.
21874   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
21875   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
21876   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
21877       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21878       N1.getOperand(0).getOperand(1) == N2 &&
21879       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
21880           VT.getVectorElementCount() &&
21881       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
21882           VT.getSizeInBits()) {
21883     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
21884   }
21885 
21886   // If both N1 and N2 are bitcast values on which insert_subvector
21887   // would makes sense, pull the bitcast through.
21888   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
21889   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
21890   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
21891     SDValue CN0 = N0.getOperand(0);
21892     SDValue CN1 = N1.getOperand(0);
21893     EVT CN0VT = CN0.getValueType();
21894     EVT CN1VT = CN1.getValueType();
21895     if (CN0VT.isVector() && CN1VT.isVector() &&
21896         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
21897         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
21898       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
21899                                       CN0.getValueType(), CN0, CN1, N2);
21900       return DAG.getBitcast(VT, NewINSERT);
21901     }
21902   }
21903 
21904   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
21905   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
21906   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
21907   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
21908       N0.getOperand(1).getValueType() == N1.getValueType() &&
21909       N0.getOperand(2) == N2)
21910     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
21911                        N1, N2);
21912 
21913   // Eliminate an intermediate insert into an undef vector:
21914   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
21915   // insert_subvector undef, X, N2
21916   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
21917       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
21918     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
21919                        N1.getOperand(1), N2);
21920 
21921   // Push subvector bitcasts to the output, adjusting the index as we go.
21922   // insert_subvector(bitcast(v), bitcast(s), c1)
21923   // -> bitcast(insert_subvector(v, s, c2))
21924   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
21925       N1.getOpcode() == ISD::BITCAST) {
21926     SDValue N0Src = peekThroughBitcasts(N0);
21927     SDValue N1Src = peekThroughBitcasts(N1);
21928     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
21929     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
21930     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
21931         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
21932       EVT NewVT;
21933       SDLoc DL(N);
21934       SDValue NewIdx;
21935       LLVMContext &Ctx = *DAG.getContext();
21936       ElementCount NumElts = VT.getVectorElementCount();
21937       unsigned EltSizeInBits = VT.getScalarSizeInBits();
21938       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
21939         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
21940         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
21941         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
21942       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
21943         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
21944         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
21945           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
21946                                    NumElts.divideCoefficientBy(Scale));
21947           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
21948         }
21949       }
21950       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
21951         SDValue Res = DAG.getBitcast(NewVT, N0Src);
21952         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
21953         return DAG.getBitcast(VT, Res);
21954       }
21955     }
21956   }
21957 
21958   // Canonicalize insert_subvector dag nodes.
21959   // Example:
21960   // (insert_subvector (insert_subvector A, Idx0), Idx1)
21961   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
21962   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
21963       N1.getValueType() == N0.getOperand(1).getValueType()) {
21964     unsigned OtherIdx = N0.getConstantOperandVal(2);
21965     if (InsIdx < OtherIdx) {
21966       // Swap nodes.
21967       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
21968                                   N0.getOperand(0), N1, N2);
21969       AddToWorklist(NewOp.getNode());
21970       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
21971                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
21972     }
21973   }
21974 
21975   // If the input vector is a concatenation, and the insert replaces
21976   // one of the pieces, we can optimize into a single concat_vectors.
21977   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
21978       N0.getOperand(0).getValueType() == N1.getValueType() &&
21979       N0.getOperand(0).getValueType().isScalableVector() ==
21980           N1.getValueType().isScalableVector()) {
21981     unsigned Factor = N1.getValueType().getVectorMinNumElements();
21982     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
21983     Ops[InsIdx / Factor] = N1;
21984     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21985   }
21986 
21987   // Simplify source operands based on insertion.
21988   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21989     return SDValue(N, 0);
21990 
21991   return SDValue();
21992 }
21993 
21994 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
21995   SDValue N0 = N->getOperand(0);
21996 
21997   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
21998   if (N0->getOpcode() == ISD::FP16_TO_FP)
21999     return N0->getOperand(0);
22000 
22001   return SDValue();
22002 }
22003 
22004 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
22005   SDValue N0 = N->getOperand(0);
22006 
22007   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
22008   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
22009     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
22010     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
22011       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
22012                          N0.getOperand(0));
22013     }
22014   }
22015 
22016   return SDValue();
22017 }
22018 
22019 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
22020   SDValue N0 = N->getOperand(0);
22021   EVT VT = N0.getValueType();
22022   unsigned Opcode = N->getOpcode();
22023 
22024   // VECREDUCE over 1-element vector is just an extract.
22025   if (VT.getVectorElementCount().isScalar()) {
22026     SDLoc dl(N);
22027     SDValue Res =
22028         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
22029                     DAG.getVectorIdxConstant(0, dl));
22030     if (Res.getValueType() != N->getValueType(0))
22031       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
22032     return Res;
22033   }
22034 
22035   // On an boolean vector an and/or reduction is the same as a umin/umax
22036   // reduction. Convert them if the latter is legal while the former isn't.
22037   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
22038     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
22039         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
22040     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
22041         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
22042         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
22043       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
22044   }
22045 
22046   return SDValue();
22047 }
22048 
22049 SDValue DAGCombiner::visitVPOp(SDNode *N) {
22050   // VP operations in which all vector elements are disabled - either by
22051   // determining that the mask is all false or that the EVL is 0 - can be
22052   // eliminated.
22053   bool AreAllEltsDisabled = false;
22054   if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
22055     AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
22056   if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
22057     AreAllEltsDisabled |=
22058         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
22059 
22060   // This is the only generic VP combine we support for now.
22061   if (!AreAllEltsDisabled)
22062     return SDValue();
22063 
22064   // Binary operations can be replaced by UNDEF.
22065   if (ISD::isVPBinaryOp(N->getOpcode()))
22066     return DAG.getUNDEF(N->getValueType(0));
22067 
22068   // VP Memory operations can be replaced by either the chain (stores) or the
22069   // chain + undef (loads).
22070   if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
22071     if (MemSD->writeMem())
22072       return MemSD->getChain();
22073     return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
22074   }
22075 
22076   // Reduction operations return the start operand when no elements are active.
22077   if (ISD::isVPReduction(N->getOpcode()))
22078     return N->getOperand(0);
22079 
22080   return SDValue();
22081 }
22082 
22083 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
22084 /// with the destination vector and a zero vector.
22085 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22086 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
22087 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22088   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
22089 
22090   EVT VT = N->getValueType(0);
22091   SDValue LHS = N->getOperand(0);
22092   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22093   SDLoc DL(N);
22094 
22095   // Make sure we're not running after operation legalization where it
22096   // may have custom lowered the vector shuffles.
22097   if (LegalOperations)
22098     return SDValue();
22099 
22100   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22101     return SDValue();
22102 
22103   EVT RVT = RHS.getValueType();
22104   unsigned NumElts = RHS.getNumOperands();
22105 
22106   // Attempt to create a valid clear mask, splitting the mask into
22107   // sub elements and checking to see if each is
22108   // all zeros or all ones - suitable for shuffle masking.
22109   auto BuildClearMask = [&](int Split) {
22110     int NumSubElts = NumElts * Split;
22111     int NumSubBits = RVT.getScalarSizeInBits() / Split;
22112 
22113     SmallVector<int, 8> Indices;
22114     for (int i = 0; i != NumSubElts; ++i) {
22115       int EltIdx = i / Split;
22116       int SubIdx = i % Split;
22117       SDValue Elt = RHS.getOperand(EltIdx);
22118       // X & undef --> 0 (not undef). So this lane must be converted to choose
22119       // from the zero constant vector (same as if the element had all 0-bits).
22120       if (Elt.isUndef()) {
22121         Indices.push_back(i + NumSubElts);
22122         continue;
22123       }
22124 
22125       APInt Bits;
22126       if (isa<ConstantSDNode>(Elt))
22127         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
22128       else if (isa<ConstantFPSDNode>(Elt))
22129         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
22130       else
22131         return SDValue();
22132 
22133       // Extract the sub element from the constant bit mask.
22134       if (DAG.getDataLayout().isBigEndian())
22135         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
22136       else
22137         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
22138 
22139       if (Bits.isAllOnes())
22140         Indices.push_back(i);
22141       else if (Bits == 0)
22142         Indices.push_back(i + NumSubElts);
22143       else
22144         return SDValue();
22145     }
22146 
22147     // Let's see if the target supports this vector_shuffle.
22148     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
22149     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
22150     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
22151       return SDValue();
22152 
22153     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
22154     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
22155                                                    DAG.getBitcast(ClearVT, LHS),
22156                                                    Zero, Indices));
22157   };
22158 
22159   // Determine maximum split level (byte level masking).
22160   int MaxSplit = 1;
22161   if (RVT.getScalarSizeInBits() % 8 == 0)
22162     MaxSplit = RVT.getScalarSizeInBits() / 8;
22163 
22164   for (int Split = 1; Split <= MaxSplit; ++Split)
22165     if (RVT.getScalarSizeInBits() % Split == 0)
22166       if (SDValue S = BuildClearMask(Split))
22167         return S;
22168 
22169   return SDValue();
22170 }
22171 
22172 /// If a vector binop is performed on splat values, it may be profitable to
22173 /// extract, scalarize, and insert/splat.
22174 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
22175   SDValue N0 = N->getOperand(0);
22176   SDValue N1 = N->getOperand(1);
22177   unsigned Opcode = N->getOpcode();
22178   EVT VT = N->getValueType(0);
22179   EVT EltVT = VT.getVectorElementType();
22180   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22181 
22182   // TODO: Remove/replace the extract cost check? If the elements are available
22183   //       as scalars, then there may be no extract cost. Should we ask if
22184   //       inserting a scalar back into a vector is cheap instead?
22185   int Index0, Index1;
22186   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
22187   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
22188   if (!Src0 || !Src1 || Index0 != Index1 ||
22189       Src0.getValueType().getVectorElementType() != EltVT ||
22190       Src1.getValueType().getVectorElementType() != EltVT ||
22191       !TLI.isExtractVecEltCheap(VT, Index0) ||
22192       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
22193     return SDValue();
22194 
22195   SDLoc DL(N);
22196   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
22197   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
22198   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
22199   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
22200 
22201   // If all lanes but 1 are undefined, no need to splat the scalar result.
22202   // TODO: Keep track of undefs and use that info in the general case.
22203   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
22204       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
22205       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
22206     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
22207     // build_vec ..undef, (bo X, Y), undef...
22208     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
22209     Ops[Index0] = ScalarBO;
22210     return DAG.getBuildVector(VT, DL, Ops);
22211   }
22212 
22213   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
22214   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
22215   return DAG.getBuildVector(VT, DL, Ops);
22216 }
22217 
22218 /// Visit a binary vector operation, like ADD.
22219 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
22220   assert(N->getValueType(0).isVector() &&
22221          "SimplifyVBinOp only works on vectors!");
22222 
22223   SDValue LHS = N->getOperand(0);
22224   SDValue RHS = N->getOperand(1);
22225   SDValue Ops[] = {LHS, RHS};
22226   EVT VT = N->getValueType(0);
22227   unsigned Opcode = N->getOpcode();
22228   SDNodeFlags Flags = N->getFlags();
22229 
22230   // See if we can constant fold the vector operation.
22231   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
22232           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
22233     return Fold;
22234 
22235   // Move unary shuffles with identical masks after a vector binop:
22236   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
22237   //   --> shuffle (VBinOp A, B), Undef, Mask
22238   // This does not require type legality checks because we are creating the
22239   // same types of operations that are in the original sequence. We do have to
22240   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
22241   // though. This code is adapted from the identical transform in instcombine.
22242   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
22243       Opcode != ISD::UREM && Opcode != ISD::SREM &&
22244       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
22245     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
22246     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
22247     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
22248         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
22249         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
22250       SDLoc DL(N);
22251       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
22252                                      RHS.getOperand(0), Flags);
22253       SDValue UndefV = LHS.getOperand(1);
22254       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
22255     }
22256 
22257     // Try to sink a splat shuffle after a binop with a uniform constant.
22258     // This is limited to cases where neither the shuffle nor the constant have
22259     // undefined elements because that could be poison-unsafe or inhibit
22260     // demanded elements analysis. It is further limited to not change a splat
22261     // of an inserted scalar because that may be optimized better by
22262     // load-folding or other target-specific behaviors.
22263     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
22264         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
22265         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22266       // binop (splat X), (splat C) --> splat (binop X, C)
22267       SDLoc DL(N);
22268       SDValue X = Shuf0->getOperand(0);
22269       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
22270       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22271                                   Shuf0->getMask());
22272     }
22273     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
22274         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
22275         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22276       // binop (splat C), (splat X) --> splat (binop C, X)
22277       SDLoc DL(N);
22278       SDValue X = Shuf1->getOperand(0);
22279       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22280       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22281                                   Shuf1->getMask());
22282     }
22283   }
22284 
22285   // The following pattern is likely to emerge with vector reduction ops. Moving
22286   // the binary operation ahead of insertion may allow using a narrower vector
22287   // instruction that has better performance than the wide version of the op:
22288   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22289   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22290       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22291       LHS.getOperand(2) == RHS.getOperand(2) &&
22292       (LHS.hasOneUse() || RHS.hasOneUse())) {
22293     SDValue X = LHS.getOperand(1);
22294     SDValue Y = RHS.getOperand(1);
22295     SDValue Z = LHS.getOperand(2);
22296     EVT NarrowVT = X.getValueType();
22297     if (NarrowVT == Y.getValueType() &&
22298         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22299                                               LegalOperations)) {
22300       // (binop undef, undef) may not return undef, so compute that result.
22301       SDLoc DL(N);
22302       SDValue VecC =
22303           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22304       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22305       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22306     }
22307   }
22308 
22309   // Make sure all but the first op are undef or constant.
22310   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
22311     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22312            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22313              return Op.isUndef() ||
22314                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22315            });
22316   };
22317 
22318   // The following pattern is likely to emerge with vector reduction ops. Moving
22319   // the binary operation ahead of the concat may allow using a narrower vector
22320   // instruction that has better performance than the wide version of the op:
22321   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22322   //   concat (VBinOp X, Y), VecC
22323   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
22324       (LHS.hasOneUse() || RHS.hasOneUse())) {
22325     EVT NarrowVT = LHS.getOperand(0).getValueType();
22326     if (NarrowVT == RHS.getOperand(0).getValueType() &&
22327         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22328       SDLoc DL(N);
22329       unsigned NumOperands = LHS.getNumOperands();
22330       SmallVector<SDValue, 4> ConcatOps;
22331       for (unsigned i = 0; i != NumOperands; ++i) {
22332         // This constant fold for operands 1 and up.
22333         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22334                                         RHS.getOperand(i)));
22335       }
22336 
22337       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22338     }
22339   }
22340 
22341   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
22342     return V;
22343 
22344   return SDValue();
22345 }
22346 
22347 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22348                                     SDValue N2) {
22349   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
22350 
22351   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22352                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
22353 
22354   // If we got a simplified select_cc node back from SimplifySelectCC, then
22355   // break it down into a new SETCC node, and a new SELECT node, and then return
22356   // the SELECT node, since we were called with a SELECT node.
22357   if (SCC.getNode()) {
22358     // Check to see if we got a select_cc back (to turn into setcc/select).
22359     // Otherwise, just return whatever node we got back, like fabs.
22360     if (SCC.getOpcode() == ISD::SELECT_CC) {
22361       const SDNodeFlags Flags = N0.getNode()->getFlags();
22362       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
22363                                   N0.getValueType(),
22364                                   SCC.getOperand(0), SCC.getOperand(1),
22365                                   SCC.getOperand(4), Flags);
22366       AddToWorklist(SETCC.getNode());
22367       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22368                                          SCC.getOperand(2), SCC.getOperand(3));
22369       SelectNode->setFlags(Flags);
22370       return SelectNode;
22371     }
22372 
22373     return SCC;
22374   }
22375   return SDValue();
22376 }
22377 
22378 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22379 /// being selected between, see if we can simplify the select.  Callers of this
22380 /// should assume that TheSelect is deleted if this returns true.  As such, they
22381 /// should return the appropriate thing (e.g. the node) back to the top-level of
22382 /// the DAG combiner loop to avoid it being looked at.
22383 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22384                                     SDValue RHS) {
22385   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22386   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22387   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22388     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22389       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22390       SDValue Sqrt = RHS;
22391       ISD::CondCode CC;
22392       SDValue CmpLHS;
22393       const ConstantFPSDNode *Zero = nullptr;
22394 
22395       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22396         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22397         CmpLHS = TheSelect->getOperand(0);
22398         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22399       } else {
22400         // SELECT or VSELECT
22401         SDValue Cmp = TheSelect->getOperand(0);
22402         if (Cmp.getOpcode() == ISD::SETCC) {
22403           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22404           CmpLHS = Cmp.getOperand(0);
22405           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22406         }
22407       }
22408       if (Zero && Zero->isZero() &&
22409           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22410           CC == ISD::SETULT || CC == ISD::SETLT)) {
22411         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22412         CombineTo(TheSelect, Sqrt);
22413         return true;
22414       }
22415     }
22416   }
22417   // Cannot simplify select with vector condition
22418   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22419 
22420   // If this is a select from two identical things, try to pull the operation
22421   // through the select.
22422   if (LHS.getOpcode() != RHS.getOpcode() ||
22423       !LHS.hasOneUse() || !RHS.hasOneUse())
22424     return false;
22425 
22426   // If this is a load and the token chain is identical, replace the select
22427   // of two loads with a load through a select of the address to load from.
22428   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22429   // constants have been dropped into the constant pool.
22430   if (LHS.getOpcode() == ISD::LOAD) {
22431     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
22432     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
22433 
22434     // Token chains must be identical.
22435     if (LHS.getOperand(0) != RHS.getOperand(0) ||
22436         // Do not let this transformation reduce the number of volatile loads.
22437         // Be conservative for atomics for the moment
22438         // TODO: This does appear to be legal for unordered atomics (see D66309)
22439         !LLD->isSimple() || !RLD->isSimple() ||
22440         // FIXME: If either is a pre/post inc/dec load,
22441         // we'd need to split out the address adjustment.
22442         LLD->isIndexed() || RLD->isIndexed() ||
22443         // If this is an EXTLOAD, the VT's must match.
22444         LLD->getMemoryVT() != RLD->getMemoryVT() ||
22445         // If this is an EXTLOAD, the kind of extension must match.
22446         (LLD->getExtensionType() != RLD->getExtensionType() &&
22447          // The only exception is if one of the extensions is anyext.
22448          LLD->getExtensionType() != ISD::EXTLOAD &&
22449          RLD->getExtensionType() != ISD::EXTLOAD) ||
22450         // FIXME: this discards src value information.  This is
22451         // over-conservative. It would be beneficial to be able to remember
22452         // both potential memory locations.  Since we are discarding
22453         // src value info, don't do the transformation if the memory
22454         // locations are not in the default address space.
22455         LLD->getPointerInfo().getAddrSpace() != 0 ||
22456         RLD->getPointerInfo().getAddrSpace() != 0 ||
22457         // We can't produce a CMOV of a TargetFrameIndex since we won't
22458         // generate the address generation required.
22459         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22460         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22461         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22462                                       LLD->getBasePtr().getValueType()))
22463       return false;
22464 
22465     // The loads must not depend on one another.
22466     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22467       return false;
22468 
22469     // Check that the select condition doesn't reach either load.  If so,
22470     // folding this will induce a cycle into the DAG.  If not, this is safe to
22471     // xform, so create a select of the addresses.
22472 
22473     SmallPtrSet<const SDNode *, 32> Visited;
22474     SmallVector<const SDNode *, 16> Worklist;
22475 
22476     // Always fail if LLD and RLD are not independent. TheSelect is a
22477     // predecessor to all Nodes in question so we need not search past it.
22478 
22479     Visited.insert(TheSelect);
22480     Worklist.push_back(LLD);
22481     Worklist.push_back(RLD);
22482 
22483     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
22484         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
22485       return false;
22486 
22487     SDValue Addr;
22488     if (TheSelect->getOpcode() == ISD::SELECT) {
22489       // We cannot do this optimization if any pair of {RLD, LLD} is a
22490       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
22491       // Loads, we only need to check if CondNode is a successor to one of the
22492       // loads. We can further avoid this if there's no use of their chain
22493       // value.
22494       SDNode *CondNode = TheSelect->getOperand(0).getNode();
22495       Worklist.push_back(CondNode);
22496 
22497       if ((LLD->hasAnyUseOfValue(1) &&
22498            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22499           (RLD->hasAnyUseOfValue(1) &&
22500            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22501         return false;
22502 
22503       Addr = DAG.getSelect(SDLoc(TheSelect),
22504                            LLD->getBasePtr().getValueType(),
22505                            TheSelect->getOperand(0), LLD->getBasePtr(),
22506                            RLD->getBasePtr());
22507     } else {  // Otherwise SELECT_CC
22508       // We cannot do this optimization if any pair of {RLD, LLD} is a
22509       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
22510       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
22511       // one of the loads. We can further avoid this if there's no use of their
22512       // chain value.
22513 
22514       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
22515       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
22516       Worklist.push_back(CondLHS);
22517       Worklist.push_back(CondRHS);
22518 
22519       if ((LLD->hasAnyUseOfValue(1) &&
22520            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22521           (RLD->hasAnyUseOfValue(1) &&
22522            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22523         return false;
22524 
22525       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
22526                          LLD->getBasePtr().getValueType(),
22527                          TheSelect->getOperand(0),
22528                          TheSelect->getOperand(1),
22529                          LLD->getBasePtr(), RLD->getBasePtr(),
22530                          TheSelect->getOperand(4));
22531     }
22532 
22533     SDValue Load;
22534     // It is safe to replace the two loads if they have different alignments,
22535     // but the new load must be the minimum (most restrictive) alignment of the
22536     // inputs.
22537     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22538     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22539     if (!RLD->isInvariant())
22540       MMOFlags &= ~MachineMemOperand::MOInvariant;
22541     if (!RLD->isDereferenceable())
22542       MMOFlags &= ~MachineMemOperand::MODereferenceable;
22543     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22544       // FIXME: Discards pointer and AA info.
22545       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22546                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22547                          MMOFlags);
22548     } else {
22549       // FIXME: Discards pointer and AA info.
22550       Load = DAG.getExtLoad(
22551           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22552                                                   : LLD->getExtensionType(),
22553           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22554           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22555     }
22556 
22557     // Users of the select now use the result of the load.
22558     CombineTo(TheSelect, Load);
22559 
22560     // Users of the old loads now use the new load's chain.  We know the
22561     // old-load value is dead now.
22562     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22563     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22564     return true;
22565   }
22566 
22567   return false;
22568 }
22569 
22570 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22571 /// bitwise 'and'.
22572 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22573                                             SDValue N1, SDValue N2, SDValue N3,
22574                                             ISD::CondCode CC) {
22575   // If this is a select where the false operand is zero and the compare is a
22576   // check of the sign bit, see if we can perform the "gzip trick":
22577   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22578   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22579   EVT XType = N0.getValueType();
22580   EVT AType = N2.getValueType();
22581   if (!isNullConstant(N3) || !XType.bitsGE(AType))
22582     return SDValue();
22583 
22584   // If the comparison is testing for a positive value, we have to invert
22585   // the sign bit mask, so only do that transform if the target has a bitwise
22586   // 'and not' instruction (the invert is free).
22587   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22588     // (X > -1) ? A : 0
22589     // (X >  0) ? X : 0 <-- This is canonical signed max.
22590     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
22591       return SDValue();
22592   } else if (CC == ISD::SETLT) {
22593     // (X <  0) ? A : 0
22594     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
22595     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
22596       return SDValue();
22597   } else {
22598     return SDValue();
22599   }
22600 
22601   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22602   // constant.
22603   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22604   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22605   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22606     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22607     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
22608       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22609       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
22610       AddToWorklist(Shift.getNode());
22611 
22612       if (XType.bitsGT(AType)) {
22613         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22614         AddToWorklist(Shift.getNode());
22615       }
22616 
22617       if (CC == ISD::SETGT)
22618         Shift = DAG.getNOT(DL, Shift, AType);
22619 
22620       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22621     }
22622   }
22623 
22624   unsigned ShCt = XType.getSizeInBits() - 1;
22625   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
22626     return SDValue();
22627 
22628   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22629   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
22630   AddToWorklist(Shift.getNode());
22631 
22632   if (XType.bitsGT(AType)) {
22633     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22634     AddToWorklist(Shift.getNode());
22635   }
22636 
22637   if (CC == ISD::SETGT)
22638     Shift = DAG.getNOT(DL, Shift, AType);
22639 
22640   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22641 }
22642 
22643 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
22644 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
22645   SDValue N0 = N->getOperand(0);
22646   SDValue N1 = N->getOperand(1);
22647   SDValue N2 = N->getOperand(2);
22648   EVT VT = N->getValueType(0);
22649   SDLoc DL(N);
22650 
22651   unsigned BinOpc = N1.getOpcode();
22652   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
22653     return SDValue();
22654 
22655   // The use checks are intentionally on SDNode because we may be dealing
22656   // with opcodes that produce more than one SDValue.
22657   // TODO: Do we really need to check N0 (the condition operand of the select)?
22658   //       But removing that clause could cause an infinite loop...
22659   if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
22660     return SDValue();
22661 
22662   // Binops may include opcodes that return multiple values, so all values
22663   // must be created/propagated from the newly created binops below.
22664   SDVTList OpVTs = N1->getVTList();
22665 
22666   // Fold select(cond, binop(x, y), binop(z, y))
22667   //  --> binop(select(cond, x, z), y)
22668   if (N1.getOperand(1) == N2.getOperand(1)) {
22669     SDValue NewSel =
22670         DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
22671     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
22672     NewBinOp->setFlags(N1->getFlags());
22673     NewBinOp->intersectFlagsWith(N2->getFlags());
22674     return NewBinOp;
22675   }
22676 
22677   // Fold select(cond, binop(x, y), binop(x, z))
22678   //  --> binop(x, select(cond, y, z))
22679   // Second op VT might be different (e.g. shift amount type)
22680   if (N1.getOperand(0) == N2.getOperand(0) &&
22681       VT == N1.getOperand(1).getValueType() &&
22682       VT == N2.getOperand(1).getValueType()) {
22683     SDValue NewSel =
22684         DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
22685     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
22686     NewBinOp->setFlags(N1->getFlags());
22687     NewBinOp->intersectFlagsWith(N2->getFlags());
22688     return NewBinOp;
22689   }
22690 
22691   // TODO: Handle isCommutativeBinOp patterns as well?
22692   return SDValue();
22693 }
22694 
22695 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
22696 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
22697   SDValue N0 = N->getOperand(0);
22698   EVT VT = N->getValueType(0);
22699   bool IsFabs = N->getOpcode() == ISD::FABS;
22700   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
22701 
22702   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
22703     return SDValue();
22704 
22705   SDValue Int = N0.getOperand(0);
22706   EVT IntVT = Int.getValueType();
22707 
22708   // The operand to cast should be integer.
22709   if (!IntVT.isInteger() || IntVT.isVector())
22710     return SDValue();
22711 
22712   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
22713   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
22714   APInt SignMask;
22715   if (N0.getValueType().isVector()) {
22716     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
22717     // 0x7f...) per element and splat it.
22718     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
22719     if (IsFabs)
22720       SignMask = ~SignMask;
22721     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
22722   } else {
22723     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
22724     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
22725     if (IsFabs)
22726       SignMask = ~SignMask;
22727   }
22728   SDLoc DL(N0);
22729   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
22730                     DAG.getConstant(SignMask, DL, IntVT));
22731   AddToWorklist(Int.getNode());
22732   return DAG.getBitcast(VT, Int);
22733 }
22734 
22735 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
22736 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
22737 /// in it. This may be a win when the constant is not otherwise available
22738 /// because it replaces two constant pool loads with one.
22739 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
22740     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
22741     ISD::CondCode CC) {
22742   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
22743     return SDValue();
22744 
22745   // If we are before legalize types, we want the other legalization to happen
22746   // first (for example, to avoid messing with soft float).
22747   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
22748   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
22749   EVT VT = N2.getValueType();
22750   if (!TV || !FV || !TLI.isTypeLegal(VT))
22751     return SDValue();
22752 
22753   // If a constant can be materialized without loads, this does not make sense.
22754   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
22755       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
22756       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
22757     return SDValue();
22758 
22759   // If both constants have multiple uses, then we won't need to do an extra
22760   // load. The values are likely around in registers for other users.
22761   if (!TV->hasOneUse() && !FV->hasOneUse())
22762     return SDValue();
22763 
22764   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
22765                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
22766   Type *FPTy = Elts[0]->getType();
22767   const DataLayout &TD = DAG.getDataLayout();
22768 
22769   // Create a ConstantArray of the two constants.
22770   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
22771   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
22772                                       TD.getPrefTypeAlign(FPTy));
22773   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
22774 
22775   // Get offsets to the 0 and 1 elements of the array, so we can select between
22776   // them.
22777   SDValue Zero = DAG.getIntPtrConstant(0, DL);
22778   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
22779   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
22780   SDValue Cond =
22781       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
22782   AddToWorklist(Cond.getNode());
22783   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
22784   AddToWorklist(CstOffset.getNode());
22785   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
22786   AddToWorklist(CPIdx.getNode());
22787   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
22788                      MachinePointerInfo::getConstantPool(
22789                          DAG.getMachineFunction()), Alignment);
22790 }
22791 
22792 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
22793 /// where 'cond' is the comparison specified by CC.
22794 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
22795                                       SDValue N2, SDValue N3, ISD::CondCode CC,
22796                                       bool NotExtCompare) {
22797   // (x ? y : y) -> y.
22798   if (N2 == N3) return N2;
22799 
22800   EVT CmpOpVT = N0.getValueType();
22801   EVT CmpResVT = getSetCCResultType(CmpOpVT);
22802   EVT VT = N2.getValueType();
22803   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
22804   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22805   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
22806 
22807   // Determine if the condition we're dealing with is constant.
22808   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
22809     AddToWorklist(SCC.getNode());
22810     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
22811       // fold select_cc true, x, y -> x
22812       // fold select_cc false, x, y -> y
22813       return !(SCCC->isZero()) ? N2 : N3;
22814     }
22815   }
22816 
22817   if (SDValue V =
22818           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
22819     return V;
22820 
22821   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
22822     return V;
22823 
22824   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
22825   // where y is has a single bit set.
22826   // A plaintext description would be, we can turn the SELECT_CC into an AND
22827   // when the condition can be materialized as an all-ones register.  Any
22828   // single bit-test can be materialized as an all-ones register with
22829   // shift-left and shift-right-arith.
22830   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
22831       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
22832     SDValue AndLHS = N0->getOperand(0);
22833     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22834     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
22835       // Shift the tested bit over the sign bit.
22836       const APInt &AndMask = ConstAndRHS->getAPIntValue();
22837       unsigned ShCt = AndMask.getBitWidth() - 1;
22838       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
22839         SDValue ShlAmt =
22840           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
22841                           getShiftAmountTy(AndLHS.getValueType()));
22842         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
22843 
22844         // Now arithmetic right shift it all the way over, so the result is
22845         // either all-ones, or zero.
22846         SDValue ShrAmt =
22847           DAG.getConstant(ShCt, SDLoc(Shl),
22848                           getShiftAmountTy(Shl.getValueType()));
22849         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
22850 
22851         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
22852       }
22853     }
22854   }
22855 
22856   // fold select C, 16, 0 -> shl C, 4
22857   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
22858   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
22859 
22860   if ((Fold || Swap) &&
22861       TLI.getBooleanContents(CmpOpVT) ==
22862           TargetLowering::ZeroOrOneBooleanContent &&
22863       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
22864 
22865     if (Swap) {
22866       CC = ISD::getSetCCInverse(CC, CmpOpVT);
22867       std::swap(N2C, N3C);
22868     }
22869 
22870     // If the caller doesn't want us to simplify this into a zext of a compare,
22871     // don't do it.
22872     if (NotExtCompare && N2C->isOne())
22873       return SDValue();
22874 
22875     SDValue Temp, SCC;
22876     // zext (setcc n0, n1)
22877     if (LegalTypes) {
22878       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
22879       if (VT.bitsLT(SCC.getValueType()))
22880         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
22881       else
22882         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22883     } else {
22884       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
22885       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22886     }
22887 
22888     AddToWorklist(SCC.getNode());
22889     AddToWorklist(Temp.getNode());
22890 
22891     if (N2C->isOne())
22892       return Temp;
22893 
22894     unsigned ShCt = N2C->getAPIntValue().logBase2();
22895     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
22896       return SDValue();
22897 
22898     // shl setcc result by log2 n2c
22899     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
22900                        DAG.getConstant(ShCt, SDLoc(Temp),
22901                                        getShiftAmountTy(Temp.getValueType())));
22902   }
22903 
22904   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
22905   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
22906   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
22907   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
22908   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
22909   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
22910   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
22911   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
22912   if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
22913     SDValue ValueOnZero = N2;
22914     SDValue Count = N3;
22915     // If the condition is NE instead of E, swap the operands.
22916     if (CC == ISD::SETNE)
22917       std::swap(ValueOnZero, Count);
22918     // Check if the value on zero is a constant equal to the bits in the type.
22919     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
22920       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
22921         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
22922         // legal, combine to just cttz.
22923         if ((Count.getOpcode() == ISD::CTTZ ||
22924              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
22925             N0 == Count.getOperand(0) &&
22926             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
22927           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
22928         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
22929         // legal, combine to just ctlz.
22930         if ((Count.getOpcode() == ISD::CTLZ ||
22931              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
22932             N0 == Count.getOperand(0) &&
22933             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
22934           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
22935       }
22936     }
22937   }
22938 
22939   // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
22940   // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
22941   if (!NotExtCompare && N1C && N2C && N3C &&
22942       N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
22943       ((N1C->isAllOnes() && CC == ISD::SETGT) ||
22944        (N1C->isZero() && CC == ISD::SETLT)) &&
22945       !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
22946     SDValue ASR = DAG.getNode(
22947         ISD::SRA, DL, CmpOpVT, N0,
22948         DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
22949     return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
22950                        DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
22951   }
22952 
22953   return SDValue();
22954 }
22955 
22956 /// This is a stub for TargetLowering::SimplifySetCC.
22957 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
22958                                    ISD::CondCode Cond, const SDLoc &DL,
22959                                    bool foldBooleans) {
22960   TargetLowering::DAGCombinerInfo
22961     DagCombineInfo(DAG, Level, false, this);
22962   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
22963 }
22964 
22965 /// Given an ISD::SDIV node expressing a divide by constant, return
22966 /// a DAG expression to select that will generate the same value by multiplying
22967 /// by a magic number.
22968 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22969 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
22970   // when optimising for minimum size, we don't want to expand a div to a mul
22971   // and a shift.
22972   if (DAG.getMachineFunction().getFunction().hasMinSize())
22973     return SDValue();
22974 
22975   SmallVector<SDNode *, 8> Built;
22976   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
22977     for (SDNode *N : Built)
22978       AddToWorklist(N);
22979     return S;
22980   }
22981 
22982   return SDValue();
22983 }
22984 
22985 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
22986 /// DAG expression that will generate the same value by right shifting.
22987 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
22988   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
22989   if (!C)
22990     return SDValue();
22991 
22992   // Avoid division by zero.
22993   if (C->isZero())
22994     return SDValue();
22995 
22996   SmallVector<SDNode *, 8> Built;
22997   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
22998     for (SDNode *N : Built)
22999       AddToWorklist(N);
23000     return S;
23001   }
23002 
23003   return SDValue();
23004 }
23005 
23006 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
23007 /// expression that will generate the same value by multiplying by a magic
23008 /// number.
23009 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23010 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
23011   // when optimising for minimum size, we don't want to expand a div to a mul
23012   // and a shift.
23013   if (DAG.getMachineFunction().getFunction().hasMinSize())
23014     return SDValue();
23015 
23016   SmallVector<SDNode *, 8> Built;
23017   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
23018     for (SDNode *N : Built)
23019       AddToWorklist(N);
23020     return S;
23021   }
23022 
23023   return SDValue();
23024 }
23025 
23026 /// Determines the LogBase2 value for a non-null input value using the
23027 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
23028 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
23029   EVT VT = V.getValueType();
23030   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
23031   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
23032   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
23033   return LogBase2;
23034 }
23035 
23036 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23037 /// For the reciprocal, we need to find the zero of the function:
23038 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
23039 ///     =>
23040 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
23041 ///     does not require additional intermediate precision]
23042 /// For the last iteration, put numerator N into it to gain more precision:
23043 ///   Result = N X_i + X_i (N - N A X_i)
23044 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
23045                                       SDNodeFlags Flags) {
23046   if (LegalDAG)
23047     return SDValue();
23048 
23049   // TODO: Handle extended types?
23050   EVT VT = Op.getValueType();
23051   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23052       VT.getScalarType() != MVT::f64)
23053     return SDValue();
23054 
23055   // If estimates are explicitly disabled for this function, we're done.
23056   MachineFunction &MF = DAG.getMachineFunction();
23057   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
23058   if (Enabled == TLI.ReciprocalEstimate::Disabled)
23059     return SDValue();
23060 
23061   // Estimates may be explicitly enabled for this type with a custom number of
23062   // refinement steps.
23063   int Iterations = TLI.getDivRefinementSteps(VT, MF);
23064   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
23065     AddToWorklist(Est.getNode());
23066 
23067     SDLoc DL(Op);
23068     if (Iterations) {
23069       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
23070 
23071       // Newton iterations: Est = Est + Est (N - Arg * Est)
23072       // If this is the last iteration, also multiply by the numerator.
23073       for (int i = 0; i < Iterations; ++i) {
23074         SDValue MulEst = Est;
23075 
23076         if (i == Iterations - 1) {
23077           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
23078           AddToWorklist(MulEst.getNode());
23079         }
23080 
23081         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
23082         AddToWorklist(NewEst.getNode());
23083 
23084         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
23085                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
23086         AddToWorklist(NewEst.getNode());
23087 
23088         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23089         AddToWorklist(NewEst.getNode());
23090 
23091         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
23092         AddToWorklist(Est.getNode());
23093       }
23094     } else {
23095       // If no iterations are available, multiply with N.
23096       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
23097       AddToWorklist(Est.getNode());
23098     }
23099 
23100     return Est;
23101   }
23102 
23103   return SDValue();
23104 }
23105 
23106 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23107 /// For the reciprocal sqrt, we need to find the zero of the function:
23108 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23109 ///     =>
23110 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
23111 /// As a result, we precompute A/2 prior to the iteration loop.
23112 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
23113                                          unsigned Iterations,
23114                                          SDNodeFlags Flags, bool Reciprocal) {
23115   EVT VT = Arg.getValueType();
23116   SDLoc DL(Arg);
23117   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
23118 
23119   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
23120   // this entire sequence requires only one FP constant.
23121   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
23122   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
23123 
23124   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
23125   for (unsigned i = 0; i < Iterations; ++i) {
23126     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
23127     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
23128     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
23129     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23130   }
23131 
23132   // If non-reciprocal square root is requested, multiply the result by Arg.
23133   if (!Reciprocal)
23134     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
23135 
23136   return Est;
23137 }
23138 
23139 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23140 /// For the reciprocal sqrt, we need to find the zero of the function:
23141 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23142 ///     =>
23143 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
23144 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
23145                                          unsigned Iterations,
23146                                          SDNodeFlags Flags, bool Reciprocal) {
23147   EVT VT = Arg.getValueType();
23148   SDLoc DL(Arg);
23149   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
23150   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
23151 
23152   // This routine must enter the loop below to work correctly
23153   // when (Reciprocal == false).
23154   assert(Iterations > 0);
23155 
23156   // Newton iterations for reciprocal square root:
23157   // E = (E * -0.5) * ((A * E) * E + -3.0)
23158   for (unsigned i = 0; i < Iterations; ++i) {
23159     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
23160     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
23161     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
23162 
23163     // When calculating a square root at the last iteration build:
23164     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
23165     // (notice a common subexpression)
23166     SDValue LHS;
23167     if (Reciprocal || (i + 1) < Iterations) {
23168       // RSQRT: LHS = (E * -0.5)
23169       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
23170     } else {
23171       // SQRT: LHS = (A * E) * -0.5
23172       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
23173     }
23174 
23175     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
23176   }
23177 
23178   return Est;
23179 }
23180 
23181 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
23182 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
23183 /// Op can be zero.
23184 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
23185                                            bool Reciprocal) {
23186   if (LegalDAG)
23187     return SDValue();
23188 
23189   // TODO: Handle extended types?
23190   EVT VT = Op.getValueType();
23191   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23192       VT.getScalarType() != MVT::f64)
23193     return SDValue();
23194 
23195   // If estimates are explicitly disabled for this function, we're done.
23196   MachineFunction &MF = DAG.getMachineFunction();
23197   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
23198   if (Enabled == TLI.ReciprocalEstimate::Disabled)
23199     return SDValue();
23200 
23201   // Estimates may be explicitly enabled for this type with a custom number of
23202   // refinement steps.
23203   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
23204 
23205   bool UseOneConstNR = false;
23206   if (SDValue Est =
23207       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
23208                           Reciprocal)) {
23209     AddToWorklist(Est.getNode());
23210 
23211     if (Iterations)
23212       Est = UseOneConstNR
23213             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
23214             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
23215     if (!Reciprocal) {
23216       SDLoc DL(Op);
23217       // Try the target specific test first.
23218       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
23219 
23220       // The estimate is now completely wrong if the input was exactly 0.0 or
23221       // possibly a denormal. Force the answer to 0.0 or value provided by
23222       // target for those cases.
23223       Est = DAG.getNode(
23224           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
23225           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
23226     }
23227     return Est;
23228   }
23229 
23230   return SDValue();
23231 }
23232 
23233 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23234   return buildSqrtEstimateImpl(Op, Flags, true);
23235 }
23236 
23237 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23238   return buildSqrtEstimateImpl(Op, Flags, false);
23239 }
23240 
23241 /// Return true if there is any possibility that the two addresses overlap.
23242 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
23243 
23244   struct MemUseCharacteristics {
23245     bool IsVolatile;
23246     bool IsAtomic;
23247     SDValue BasePtr;
23248     int64_t Offset;
23249     Optional<int64_t> NumBytes;
23250     MachineMemOperand *MMO;
23251   };
23252 
23253   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
23254     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
23255       int64_t Offset = 0;
23256       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
23257         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
23258                      ? C->getSExtValue()
23259                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
23260                            ? -1 * C->getSExtValue()
23261                            : 0;
23262       uint64_t Size =
23263           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
23264       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
23265               Offset /*base offset*/,
23266               Optional<int64_t>(Size),
23267               LSN->getMemOperand()};
23268     }
23269     if (const auto *LN = cast<LifetimeSDNode>(N))
23270       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
23271               (LN->hasOffset()) ? LN->getOffset() : 0,
23272               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
23273                                 : Optional<int64_t>(),
23274               (MachineMemOperand *)nullptr};
23275     // Default.
23276     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
23277             (int64_t)0 /*offset*/,
23278             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
23279   };
23280 
23281   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
23282                         MUC1 = getCharacteristics(Op1);
23283 
23284   // If they are to the same address, then they must be aliases.
23285   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
23286       MUC0.Offset == MUC1.Offset)
23287     return true;
23288 
23289   // If they are both volatile then they cannot be reordered.
23290   if (MUC0.IsVolatile && MUC1.IsVolatile)
23291     return true;
23292 
23293   // Be conservative about atomics for the moment
23294   // TODO: This is way overconservative for unordered atomics (see D66309)
23295   if (MUC0.IsAtomic && MUC1.IsAtomic)
23296     return true;
23297 
23298   if (MUC0.MMO && MUC1.MMO) {
23299     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23300         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23301       return false;
23302   }
23303 
23304   // Try to prove that there is aliasing, or that there is no aliasing. Either
23305   // way, we can return now. If nothing can be proved, proceed with more tests.
23306   bool IsAlias;
23307   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23308                                        DAG, IsAlias))
23309     return IsAlias;
23310 
23311   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23312   // either are not known.
23313   if (!MUC0.MMO || !MUC1.MMO)
23314     return true;
23315 
23316   // If one operation reads from invariant memory, and the other may store, they
23317   // cannot alias. These should really be checking the equivalent of mayWrite,
23318   // but it only matters for memory nodes other than load /store.
23319   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23320       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23321     return false;
23322 
23323   // If we know required SrcValue1 and SrcValue2 have relatively large
23324   // alignment compared to the size and offset of the access, we may be able
23325   // to prove they do not alias. This check is conservative for now to catch
23326   // cases created by splitting vector types, it only works when the offsets are
23327   // multiples of the size of the data.
23328   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23329   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23330   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23331   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23332   auto &Size0 = MUC0.NumBytes;
23333   auto &Size1 = MUC1.NumBytes;
23334   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
23335       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23336       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23337       SrcValOffset1 % *Size1 == 0) {
23338     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23339     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23340 
23341     // There is no overlap between these relatively aligned accesses of
23342     // similar size. Return no alias.
23343     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23344       return false;
23345   }
23346 
23347   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
23348                    ? CombinerGlobalAA
23349                    : DAG.getSubtarget().useAA();
23350 #ifndef NDEBUG
23351   if (CombinerAAOnlyFunc.getNumOccurrences() &&
23352       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
23353     UseAA = false;
23354 #endif
23355 
23356   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23357       Size0.hasValue() && Size1.hasValue()) {
23358     // Use alias analysis information.
23359     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23360     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23361     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23362     if (AA->isNoAlias(
23363             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23364                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23365             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23366                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23367       return false;
23368   }
23369 
23370   // Otherwise we have to assume they alias.
23371   return true;
23372 }
23373 
23374 /// Walk up chain skipping non-aliasing memory nodes,
23375 /// looking for aliasing nodes and adding them to the Aliases vector.
23376 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23377                                    SmallVectorImpl<SDValue> &Aliases) {
23378   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
23379   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
23380 
23381   // Get alias information for node.
23382   // TODO: relax aliasing for unordered atomics (see D66309)
23383   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23384 
23385   // Starting off.
23386   Chains.push_back(OriginalChain);
23387   unsigned Depth = 0;
23388 
23389   // Attempt to improve chain by a single step
23390   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23391     switch (C.getOpcode()) {
23392     case ISD::EntryToken:
23393       // No need to mark EntryToken.
23394       C = SDValue();
23395       return true;
23396     case ISD::LOAD:
23397     case ISD::STORE: {
23398       // Get alias information for C.
23399       // TODO: Relax aliasing for unordered atomics (see D66309)
23400       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23401                       cast<LSBaseSDNode>(C.getNode())->isSimple();
23402       if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
23403         // Look further up the chain.
23404         C = C.getOperand(0);
23405         return true;
23406       }
23407       // Alias, so stop here.
23408       return false;
23409     }
23410 
23411     case ISD::CopyFromReg:
23412       // Always forward past past CopyFromReg.
23413       C = C.getOperand(0);
23414       return true;
23415 
23416     case ISD::LIFETIME_START:
23417     case ISD::LIFETIME_END: {
23418       // We can forward past any lifetime start/end that can be proven not to
23419       // alias the memory access.
23420       if (!mayAlias(N, C.getNode())) {
23421         // Look further up the chain.
23422         C = C.getOperand(0);
23423         return true;
23424       }
23425       return false;
23426     }
23427     default:
23428       return false;
23429     }
23430   };
23431 
23432   // Look at each chain and determine if it is an alias.  If so, add it to the
23433   // aliases list.  If not, then continue up the chain looking for the next
23434   // candidate.
23435   while (!Chains.empty()) {
23436     SDValue Chain = Chains.pop_back_val();
23437 
23438     // Don't bother if we've seen Chain before.
23439     if (!Visited.insert(Chain.getNode()).second)
23440       continue;
23441 
23442     // For TokenFactor nodes, look at each operand and only continue up the
23443     // chain until we reach the depth limit.
23444     //
23445     // FIXME: The depth check could be made to return the last non-aliasing
23446     // chain we found before we hit a tokenfactor rather than the original
23447     // chain.
23448     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23449       Aliases.clear();
23450       Aliases.push_back(OriginalChain);
23451       return;
23452     }
23453 
23454     if (Chain.getOpcode() == ISD::TokenFactor) {
23455       // We have to check each of the operands of the token factor for "small"
23456       // token factors, so we queue them up.  Adding the operands to the queue
23457       // (stack) in reverse order maintains the original order and increases the
23458       // likelihood that getNode will find a matching token factor (CSE.)
23459       if (Chain.getNumOperands() > 16) {
23460         Aliases.push_back(Chain);
23461         continue;
23462       }
23463       for (unsigned n = Chain.getNumOperands(); n;)
23464         Chains.push_back(Chain.getOperand(--n));
23465       ++Depth;
23466       continue;
23467     }
23468     // Everything else
23469     if (ImproveChain(Chain)) {
23470       // Updated Chain Found, Consider new chain if one exists.
23471       if (Chain.getNode())
23472         Chains.push_back(Chain);
23473       ++Depth;
23474       continue;
23475     }
23476     // No Improved Chain Possible, treat as Alias.
23477     Aliases.push_back(Chain);
23478   }
23479 }
23480 
23481 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
23482 /// (aliasing node.)
23483 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
23484   if (OptLevel == CodeGenOpt::None)
23485     return OldChain;
23486 
23487   // Ops for replacing token factor.
23488   SmallVector<SDValue, 8> Aliases;
23489 
23490   // Accumulate all the aliases to this node.
23491   GatherAllAliases(N, OldChain, Aliases);
23492 
23493   // If no operands then chain to entry token.
23494   if (Aliases.size() == 0)
23495     return DAG.getEntryNode();
23496 
23497   // If a single operand then chain to it.  We don't need to revisit it.
23498   if (Aliases.size() == 1)
23499     return Aliases[0];
23500 
23501   // Construct a custom tailored token factor.
23502   return DAG.getTokenFactor(SDLoc(N), Aliases);
23503 }
23504 
23505 namespace {
23506 // TODO: Replace with with std::monostate when we move to C++17.
23507 struct UnitT { } Unit;
23508 bool operator==(const UnitT &, const UnitT &) { return true; }
23509 bool operator!=(const UnitT &, const UnitT &) { return false; }
23510 } // namespace
23511 
23512 // This function tries to collect a bunch of potentially interesting
23513 // nodes to improve the chains of, all at once. This might seem
23514 // redundant, as this function gets called when visiting every store
23515 // node, so why not let the work be done on each store as it's visited?
23516 //
23517 // I believe this is mainly important because mergeConsecutiveStores
23518 // is unable to deal with merging stores of different sizes, so unless
23519 // we improve the chains of all the potential candidates up-front
23520 // before running mergeConsecutiveStores, it might only see some of
23521 // the nodes that will eventually be candidates, and then not be able
23522 // to go from a partially-merged state to the desired final
23523 // fully-merged state.
23524 
23525 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
23526   SmallVector<StoreSDNode *, 8> ChainedStores;
23527   StoreSDNode *STChain = St;
23528   // Intervals records which offsets from BaseIndex have been covered. In
23529   // the common case, every store writes to the immediately previous address
23530   // space and thus merged with the previous interval at insertion time.
23531 
23532   using IMap =
23533       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
23534   IMap::Allocator A;
23535   IMap Intervals(A);
23536 
23537   // This holds the base pointer, index, and the offset in bytes from the base
23538   // pointer.
23539   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23540 
23541   // We must have a base and an offset.
23542   if (!BasePtr.getBase().getNode())
23543     return false;
23544 
23545   // Do not handle stores to undef base pointers.
23546   if (BasePtr.getBase().isUndef())
23547     return false;
23548 
23549   // Do not handle stores to opaque types
23550   if (St->getMemoryVT().isZeroSized())
23551     return false;
23552 
23553   // BaseIndexOffset assumes that offsets are fixed-size, which
23554   // is not valid for scalable vectors where the offsets are
23555   // scaled by `vscale`, so bail out early.
23556   if (St->getMemoryVT().isScalableVector())
23557     return false;
23558 
23559   // Add ST's interval.
23560   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
23561 
23562   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
23563     if (Chain->getMemoryVT().isScalableVector())
23564       return false;
23565 
23566     // If the chain has more than one use, then we can't reorder the mem ops.
23567     if (!SDValue(Chain, 0)->hasOneUse())
23568       break;
23569     // TODO: Relax for unordered atomics (see D66309)
23570     if (!Chain->isSimple() || Chain->isIndexed())
23571       break;
23572 
23573     // Find the base pointer and offset for this memory node.
23574     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
23575     // Check that the base pointer is the same as the original one.
23576     int64_t Offset;
23577     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
23578       break;
23579     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
23580     // Make sure we don't overlap with other intervals by checking the ones to
23581     // the left or right before inserting.
23582     auto I = Intervals.find(Offset);
23583     // If there's a next interval, we should end before it.
23584     if (I != Intervals.end() && I.start() < (Offset + Length))
23585       break;
23586     // If there's a previous interval, we should start after it.
23587     if (I != Intervals.begin() && (--I).stop() <= Offset)
23588       break;
23589     Intervals.insert(Offset, Offset + Length, Unit);
23590 
23591     ChainedStores.push_back(Chain);
23592     STChain = Chain;
23593   }
23594 
23595   // If we didn't find a chained store, exit.
23596   if (ChainedStores.size() == 0)
23597     return false;
23598 
23599   // Improve all chained stores (St and ChainedStores members) starting from
23600   // where the store chain ended and return single TokenFactor.
23601   SDValue NewChain = STChain->getChain();
23602   SmallVector<SDValue, 8> TFOps;
23603   for (unsigned I = ChainedStores.size(); I;) {
23604     StoreSDNode *S = ChainedStores[--I];
23605     SDValue BetterChain = FindBetterChain(S, NewChain);
23606     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
23607         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
23608     TFOps.push_back(SDValue(S, 0));
23609     ChainedStores[I] = S;
23610   }
23611 
23612   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
23613   SDValue BetterChain = FindBetterChain(St, NewChain);
23614   SDValue NewST;
23615   if (St->isTruncatingStore())
23616     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
23617                               St->getBasePtr(), St->getMemoryVT(),
23618                               St->getMemOperand());
23619   else
23620     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
23621                          St->getBasePtr(), St->getMemOperand());
23622 
23623   TFOps.push_back(NewST);
23624 
23625   // If we improved every element of TFOps, then we've lost the dependence on
23626   // NewChain to successors of St and we need to add it back to TFOps. Do so at
23627   // the beginning to keep relative order consistent with FindBetterChains.
23628   auto hasImprovedChain = [&](SDValue ST) -> bool {
23629     return ST->getOperand(0) != NewChain;
23630   };
23631   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
23632   if (AddNewChain)
23633     TFOps.insert(TFOps.begin(), NewChain);
23634 
23635   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
23636   CombineTo(St, TF);
23637 
23638   // Add TF and its operands to the worklist.
23639   AddToWorklist(TF.getNode());
23640   for (const SDValue &Op : TF->ops())
23641     AddToWorklist(Op.getNode());
23642   AddToWorklist(STChain);
23643   return true;
23644 }
23645 
23646 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
23647   if (OptLevel == CodeGenOpt::None)
23648     return false;
23649 
23650   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23651 
23652   // We must have a base and an offset.
23653   if (!BasePtr.getBase().getNode())
23654     return false;
23655 
23656   // Do not handle stores to undef base pointers.
23657   if (BasePtr.getBase().isUndef())
23658     return false;
23659 
23660   // Directly improve a chain of disjoint stores starting at St.
23661   if (parallelizeChainedStores(St))
23662     return true;
23663 
23664   // Improve St's Chain..
23665   SDValue BetterChain = FindBetterChain(St, St->getChain());
23666   if (St->getChain() != BetterChain) {
23667     replaceStoreChain(St, BetterChain);
23668     return true;
23669   }
23670   return false;
23671 }
23672 
23673 /// This is the entry point for the file.
23674 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
23675                            CodeGenOpt::Level OptLevel) {
23676   /// This is the main entry point to this class.
23677   DAGCombiner(*this, AA, OptLevel).Run(Level);
23678 }
23679