1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/Analysis/VectorUtils.h"
35 #include "llvm/CodeGen/DAGCombine.h"
36 #include "llvm/CodeGen/ISDOpcodes.h"
37 #include "llvm/CodeGen/MachineFrameInfo.h"
38 #include "llvm/CodeGen/MachineFunction.h"
39 #include "llvm/CodeGen/MachineMemOperand.h"
40 #include "llvm/CodeGen/RuntimeLibcalls.h"
41 #include "llvm/CodeGen/SelectionDAG.h"
42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
43 #include "llvm/CodeGen/SelectionDAGNodes.h"
44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
45 #include "llvm/CodeGen/TargetLowering.h"
46 #include "llvm/CodeGen/TargetRegisterInfo.h"
47 #include "llvm/CodeGen/TargetSubtargetInfo.h"
48 #include "llvm/CodeGen/ValueTypes.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/ErrorHandling.h"
62 #include "llvm/Support/KnownBits.h"
63 #include "llvm/Support/MachineValueType.h"
64 #include "llvm/Support/MathExtras.h"
65 #include "llvm/Support/raw_ostream.h"
66 #include "llvm/Target/TargetMachine.h"
67 #include "llvm/Target/TargetOptions.h"
68 #include <algorithm>
69 #include <cassert>
70 #include <cstdint>
71 #include <functional>
72 #include <iterator>
73 #include <string>
74 #include <tuple>
75 #include <utility>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined   , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
88 
89 static cl::opt<bool>
90 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
91                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
92 
93 static cl::opt<bool>
94 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
95         cl::desc("Enable DAG combiner's use of TBAA"));
96 
97 #ifndef NDEBUG
98 static cl::opt<std::string>
99 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
100                    cl::desc("Only use DAG-combiner alias analysis in this"
101                             " function"));
102 #endif
103 
104 /// Hidden option to stress test load slicing, i.e., when this option
105 /// is enabled, load slicing bypasses most of its profitability guards.
106 static cl::opt<bool>
107 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
108                   cl::desc("Bypass the profitability model of load slicing"),
109                   cl::init(false));
110 
111 static cl::opt<bool>
112   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
113                     cl::desc("DAG combiner may split indexing from loads"));
114 
115 static cl::opt<bool>
116     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
117                        cl::desc("DAG combiner enable merging multiple stores "
118                                 "into a wider store"));
119 
120 static cl::opt<unsigned> TokenFactorInlineLimit(
121     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
122     cl::desc("Limit the number of operands to inline for Token Factors"));
123 
124 static cl::opt<unsigned> StoreMergeDependenceLimit(
125     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
126     cl::desc("Limit the number of times for the same StoreNode and RootNode "
127              "to bail out in store merging dependence check"));
128 
129 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
130     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
131     cl::desc("DAG cominber enable reducing the width of load/op/store "
132              "sequence"));
133 
134 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
135     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
136     cl::desc("DAG cominber enable load/<replace bytes>/store with "
137              "a narrower store"));
138 
139 namespace {
140 
141   class DAGCombiner {
142     SelectionDAG &DAG;
143     const TargetLowering &TLI;
144     const SelectionDAGTargetInfo *STI;
145     CombineLevel Level;
146     CodeGenOpt::Level OptLevel;
147     bool LegalDAG = false;
148     bool LegalOperations = false;
149     bool LegalTypes = false;
150     bool ForCodeSize;
151     bool DisableGenericCombines;
152 
153     /// Worklist of all of the nodes that need to be simplified.
154     ///
155     /// This must behave as a stack -- new nodes to process are pushed onto the
156     /// back and when processing we pop off of the back.
157     ///
158     /// The worklist will not contain duplicates but may contain null entries
159     /// due to nodes being deleted from the underlying DAG.
160     SmallVector<SDNode *, 64> Worklist;
161 
162     /// Mapping from an SDNode to its position on the worklist.
163     ///
164     /// This is used to find and remove nodes from the worklist (by nulling
165     /// them) when they are deleted from the underlying DAG. It relies on
166     /// stable indices of nodes within the worklist.
167     DenseMap<SDNode *, unsigned> WorklistMap;
168     /// This records all nodes attempted to add to the worklist since we
169     /// considered a new worklist entry. As we keep do not add duplicate nodes
170     /// in the worklist, this is different from the tail of the worklist.
171     SmallSetVector<SDNode *, 32> PruningList;
172 
173     /// Set of nodes which have been combined (at least once).
174     ///
175     /// This is used to allow us to reliably add any operands of a DAG node
176     /// which have not yet been combined to the worklist.
177     SmallPtrSet<SDNode *, 32> CombinedNodes;
178 
179     /// Map from candidate StoreNode to the pair of RootNode and count.
180     /// The count is used to track how many times we have seen the StoreNode
181     /// with the same RootNode bail out in dependence check. If we have seen
182     /// the bail out for the same pair many times over a limit, we won't
183     /// consider the StoreNode with the same RootNode as store merging
184     /// candidate again.
185     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
186 
187     // AA - Used for DAG load/store alias analysis.
188     AliasAnalysis *AA;
189 
190     /// When an instruction is simplified, add all users of the instruction to
191     /// the work lists because they might get more simplified now.
192     void AddUsersToWorklist(SDNode *N) {
193       for (SDNode *Node : N->uses())
194         AddToWorklist(Node);
195     }
196 
197     /// Convenient shorthand to add a node and all of its user to the worklist.
198     void AddToWorklistWithUsers(SDNode *N) {
199       AddUsersToWorklist(N);
200       AddToWorklist(N);
201     }
202 
203     // Prune potentially dangling nodes. This is called after
204     // any visit to a node, but should also be called during a visit after any
205     // failed combine which may have created a DAG node.
206     void clearAddedDanglingWorklistEntries() {
207       // Check any nodes added to the worklist to see if they are prunable.
208       while (!PruningList.empty()) {
209         auto *N = PruningList.pop_back_val();
210         if (N->use_empty())
211           recursivelyDeleteUnusedNodes(N);
212       }
213     }
214 
215     SDNode *getNextWorklistEntry() {
216       // Before we do any work, remove nodes that are not in use.
217       clearAddedDanglingWorklistEntries();
218       SDNode *N = nullptr;
219       // The Worklist holds the SDNodes in order, but it may contain null
220       // entries.
221       while (!N && !Worklist.empty()) {
222         N = Worklist.pop_back_val();
223       }
224 
225       if (N) {
226         bool GoodWorklistEntry = WorklistMap.erase(N);
227         (void)GoodWorklistEntry;
228         assert(GoodWorklistEntry &&
229                "Found a worklist entry without a corresponding map entry!");
230       }
231       return N;
232     }
233 
234     /// Call the node-specific routine that folds each particular type of node.
235     SDValue visit(SDNode *N);
236 
237   public:
238     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
239         : DAG(D), TLI(D.getTargetLoweringInfo()),
240           STI(D.getSubtarget().getSelectionDAGInfo()),
241           Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
242       ForCodeSize = DAG.shouldOptForSize();
243       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
244 
245       MaximumLegalStoreInBits = 0;
246       // We use the minimum store size here, since that's all we can guarantee
247       // for the scalable vector types.
248       for (MVT VT : MVT::all_valuetypes())
249         if (EVT(VT).isSimple() && VT != MVT::Other &&
250             TLI.isTypeLegal(EVT(VT)) &&
251             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
252           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
253     }
254 
255     void ConsiderForPruning(SDNode *N) {
256       // Mark this for potential pruning.
257       PruningList.insert(N);
258     }
259 
260     /// Add to the worklist making sure its instance is at the back (next to be
261     /// processed.)
262     void AddToWorklist(SDNode *N) {
263       assert(N->getOpcode() != ISD::DELETED_NODE &&
264              "Deleted Node added to Worklist");
265 
266       // Skip handle nodes as they can't usefully be combined and confuse the
267       // zero-use deletion strategy.
268       if (N->getOpcode() == ISD::HANDLENODE)
269         return;
270 
271       ConsiderForPruning(N);
272 
273       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
274         Worklist.push_back(N);
275     }
276 
277     /// Remove all instances of N from the worklist.
278     void removeFromWorklist(SDNode *N) {
279       CombinedNodes.erase(N);
280       PruningList.remove(N);
281       StoreRootCountMap.erase(N);
282 
283       auto It = WorklistMap.find(N);
284       if (It == WorklistMap.end())
285         return; // Not in the worklist.
286 
287       // Null out the entry rather than erasing it to avoid a linear operation.
288       Worklist[It->second] = nullptr;
289       WorklistMap.erase(It);
290     }
291 
292     void deleteAndRecombine(SDNode *N);
293     bool recursivelyDeleteUnusedNodes(SDNode *N);
294 
295     /// Replaces all uses of the results of one DAG node with new values.
296     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
297                       bool AddTo = true);
298 
299     /// Replaces all uses of the results of one DAG node with new values.
300     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
301       return CombineTo(N, &Res, 1, AddTo);
302     }
303 
304     /// Replaces all uses of the results of one DAG node with new values.
305     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
306                       bool AddTo = true) {
307       SDValue To[] = { Res0, Res1 };
308       return CombineTo(N, To, 2, AddTo);
309     }
310 
311     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
312 
313   private:
314     unsigned MaximumLegalStoreInBits;
315 
316     /// Check the specified integer node value to see if it can be simplified or
317     /// if things it uses can be simplified by bit propagation.
318     /// If so, return true.
319     bool SimplifyDemandedBits(SDValue Op) {
320       unsigned BitWidth = Op.getScalarValueSizeInBits();
321       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
322       return SimplifyDemandedBits(Op, DemandedBits);
323     }
324 
325     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
326       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
327       KnownBits Known;
328       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
329         return false;
330 
331       // Revisit the node.
332       AddToWorklist(Op.getNode());
333 
334       CommitTargetLoweringOpt(TLO);
335       return true;
336     }
337 
338     /// Check the specified vector node value to see if it can be simplified or
339     /// if things it uses can be simplified as it only uses some of the
340     /// elements. If so, return true.
341     bool SimplifyDemandedVectorElts(SDValue Op) {
342       // TODO: For now just pretend it cannot be simplified.
343       if (Op.getValueType().isScalableVector())
344         return false;
345 
346       unsigned NumElts = Op.getValueType().getVectorNumElements();
347       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
348       return SimplifyDemandedVectorElts(Op, DemandedElts);
349     }
350 
351     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
352                               const APInt &DemandedElts,
353                               bool AssumeSingleUse = false);
354     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
355                                     bool AssumeSingleUse = false);
356 
357     bool CombineToPreIndexedLoadStore(SDNode *N);
358     bool CombineToPostIndexedLoadStore(SDNode *N);
359     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
360     bool SliceUpLoad(SDNode *N);
361 
362     // Scalars have size 0 to distinguish from singleton vectors.
363     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
364     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
365     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
366 
367     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
368     ///   load.
369     ///
370     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
371     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
372     /// \param EltNo index of the vector element to load.
373     /// \param OriginalLoad load that EVE came from to be replaced.
374     /// \returns EVE on success SDValue() on failure.
375     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
376                                          SDValue EltNo,
377                                          LoadSDNode *OriginalLoad);
378     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
379     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
380     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
381     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
382     SDValue PromoteIntBinOp(SDValue Op);
383     SDValue PromoteIntShiftOp(SDValue Op);
384     SDValue PromoteExtend(SDValue Op);
385     bool PromoteLoad(SDValue Op);
386 
387     /// Call the node-specific routine that knows how to fold each
388     /// particular type of node. If that doesn't do anything, try the
389     /// target-specific DAG combines.
390     SDValue combine(SDNode *N);
391 
392     // Visitation implementation - Implement dag node combining for different
393     // node types.  The semantics are as follows:
394     // Return Value:
395     //   SDValue.getNode() == 0 - No change was made
396     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
397     //   otherwise              - N should be replaced by the returned Operand.
398     //
399     SDValue visitTokenFactor(SDNode *N);
400     SDValue visitMERGE_VALUES(SDNode *N);
401     SDValue visitADD(SDNode *N);
402     SDValue visitADDLike(SDNode *N);
403     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
404     SDValue visitSUB(SDNode *N);
405     SDValue visitADDSAT(SDNode *N);
406     SDValue visitSUBSAT(SDNode *N);
407     SDValue visitADDC(SDNode *N);
408     SDValue visitADDO(SDNode *N);
409     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
410     SDValue visitSUBC(SDNode *N);
411     SDValue visitSUBO(SDNode *N);
412     SDValue visitADDE(SDNode *N);
413     SDValue visitADDCARRY(SDNode *N);
414     SDValue visitSADDO_CARRY(SDNode *N);
415     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
416     SDValue visitSUBE(SDNode *N);
417     SDValue visitSUBCARRY(SDNode *N);
418     SDValue visitSSUBO_CARRY(SDNode *N);
419     SDValue visitMUL(SDNode *N);
420     SDValue visitMULFIX(SDNode *N);
421     SDValue useDivRem(SDNode *N);
422     SDValue visitSDIV(SDNode *N);
423     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
424     SDValue visitUDIV(SDNode *N);
425     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
426     SDValue visitREM(SDNode *N);
427     SDValue visitMULHU(SDNode *N);
428     SDValue visitMULHS(SDNode *N);
429     SDValue visitSMUL_LOHI(SDNode *N);
430     SDValue visitUMUL_LOHI(SDNode *N);
431     SDValue visitMULO(SDNode *N);
432     SDValue visitIMINMAX(SDNode *N);
433     SDValue visitAND(SDNode *N);
434     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
435     SDValue visitOR(SDNode *N);
436     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
437     SDValue visitXOR(SDNode *N);
438     SDValue SimplifyVBinOp(SDNode *N);
439     SDValue visitSHL(SDNode *N);
440     SDValue visitSRA(SDNode *N);
441     SDValue visitSRL(SDNode *N);
442     SDValue visitFunnelShift(SDNode *N);
443     SDValue visitRotate(SDNode *N);
444     SDValue visitABS(SDNode *N);
445     SDValue visitBSWAP(SDNode *N);
446     SDValue visitBITREVERSE(SDNode *N);
447     SDValue visitCTLZ(SDNode *N);
448     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
449     SDValue visitCTTZ(SDNode *N);
450     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
451     SDValue visitCTPOP(SDNode *N);
452     SDValue visitSELECT(SDNode *N);
453     SDValue visitVSELECT(SDNode *N);
454     SDValue visitSELECT_CC(SDNode *N);
455     SDValue visitSETCC(SDNode *N);
456     SDValue visitSETCCCARRY(SDNode *N);
457     SDValue visitSIGN_EXTEND(SDNode *N);
458     SDValue visitZERO_EXTEND(SDNode *N);
459     SDValue visitANY_EXTEND(SDNode *N);
460     SDValue visitAssertExt(SDNode *N);
461     SDValue visitAssertAlign(SDNode *N);
462     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
463     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
464     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
465     SDValue visitTRUNCATE(SDNode *N);
466     SDValue visitBITCAST(SDNode *N);
467     SDValue visitFREEZE(SDNode *N);
468     SDValue visitBUILD_PAIR(SDNode *N);
469     SDValue visitFADD(SDNode *N);
470     SDValue visitSTRICT_FADD(SDNode *N);
471     SDValue visitFSUB(SDNode *N);
472     SDValue visitFMUL(SDNode *N);
473     SDValue visitFMA(SDNode *N);
474     SDValue visitFDIV(SDNode *N);
475     SDValue visitFREM(SDNode *N);
476     SDValue visitFSQRT(SDNode *N);
477     SDValue visitFCOPYSIGN(SDNode *N);
478     SDValue visitFPOW(SDNode *N);
479     SDValue visitSINT_TO_FP(SDNode *N);
480     SDValue visitUINT_TO_FP(SDNode *N);
481     SDValue visitFP_TO_SINT(SDNode *N);
482     SDValue visitFP_TO_UINT(SDNode *N);
483     SDValue visitFP_ROUND(SDNode *N);
484     SDValue visitFP_EXTEND(SDNode *N);
485     SDValue visitFNEG(SDNode *N);
486     SDValue visitFABS(SDNode *N);
487     SDValue visitFCEIL(SDNode *N);
488     SDValue visitFTRUNC(SDNode *N);
489     SDValue visitFFLOOR(SDNode *N);
490     SDValue visitFMINNUM(SDNode *N);
491     SDValue visitFMAXNUM(SDNode *N);
492     SDValue visitFMINIMUM(SDNode *N);
493     SDValue visitFMAXIMUM(SDNode *N);
494     SDValue visitBRCOND(SDNode *N);
495     SDValue visitBR_CC(SDNode *N);
496     SDValue visitLOAD(SDNode *N);
497 
498     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
499     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
500 
501     SDValue visitSTORE(SDNode *N);
502     SDValue visitLIFETIME_END(SDNode *N);
503     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
504     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
505     SDValue visitBUILD_VECTOR(SDNode *N);
506     SDValue visitCONCAT_VECTORS(SDNode *N);
507     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
508     SDValue visitVECTOR_SHUFFLE(SDNode *N);
509     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
510     SDValue visitINSERT_SUBVECTOR(SDNode *N);
511     SDValue visitMLOAD(SDNode *N);
512     SDValue visitMSTORE(SDNode *N);
513     SDValue visitMGATHER(SDNode *N);
514     SDValue visitMSCATTER(SDNode *N);
515     SDValue visitFP_TO_FP16(SDNode *N);
516     SDValue visitFP16_TO_FP(SDNode *N);
517     SDValue visitVECREDUCE(SDNode *N);
518 
519     SDValue visitFADDForFMACombine(SDNode *N);
520     SDValue visitFSUBForFMACombine(SDNode *N);
521     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
522 
523     SDValue XformToShuffleWithZero(SDNode *N);
524     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
525                                                     const SDLoc &DL, SDValue N0,
526                                                     SDValue N1);
527     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
528                                       SDValue N1);
529     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
530                            SDValue N1, SDNodeFlags Flags);
531 
532     SDValue visitShiftByConstant(SDNode *N);
533 
534     SDValue foldSelectOfConstants(SDNode *N);
535     SDValue foldVSelectOfConstants(SDNode *N);
536     SDValue foldBinOpIntoSelect(SDNode *BO);
537     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
538     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
539     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
540     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
541                              SDValue N2, SDValue N3, ISD::CondCode CC,
542                              bool NotExtCompare = false);
543     SDValue convertSelectOfFPConstantsToLoadOffset(
544         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
545         ISD::CondCode CC);
546     SDValue foldSignChangeInBitcast(SDNode *N);
547     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
548                                    SDValue N2, SDValue N3, ISD::CondCode CC);
549     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
550                               const SDLoc &DL);
551     SDValue unfoldMaskedMerge(SDNode *N);
552     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
553     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
554                           const SDLoc &DL, bool foldBooleans);
555     SDValue rebuildSetCC(SDValue N);
556 
557     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
558                            SDValue &CC, bool MatchStrict = false) const;
559     bool isOneUseSetCC(SDValue N) const;
560 
561     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
562                                          unsigned HiOp);
563     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
564     SDValue CombineExtLoad(SDNode *N);
565     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
566     SDValue combineRepeatedFPDivisors(SDNode *N);
567     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
568     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
569     SDValue BuildSDIV(SDNode *N);
570     SDValue BuildSDIVPow2(SDNode *N);
571     SDValue BuildUDIV(SDNode *N);
572     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
573     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
574     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
575     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
576     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
577     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
578                                 SDNodeFlags Flags, bool Reciprocal);
579     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
580                                 SDNodeFlags Flags, bool Reciprocal);
581     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
582                                bool DemandHighBits = true);
583     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
584     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
585                               SDValue InnerPos, SDValue InnerNeg,
586                               unsigned PosOpcode, unsigned NegOpcode,
587                               const SDLoc &DL);
588     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
589                               SDValue InnerPos, SDValue InnerNeg,
590                               unsigned PosOpcode, unsigned NegOpcode,
591                               const SDLoc &DL);
592     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
593     SDValue MatchLoadCombine(SDNode *N);
594     SDValue mergeTruncStores(StoreSDNode *N);
595     SDValue ReduceLoadWidth(SDNode *N);
596     SDValue ReduceLoadOpStoreWidth(SDNode *N);
597     SDValue splitMergedValStore(StoreSDNode *ST);
598     SDValue TransformFPLoadStorePair(SDNode *N);
599     SDValue convertBuildVecZextToZext(SDNode *N);
600     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
601     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
602     SDValue reduceBuildVecToShuffle(SDNode *N);
603     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
604                                   ArrayRef<int> VectorMask, SDValue VecIn1,
605                                   SDValue VecIn2, unsigned LeftIdx,
606                                   bool DidSplitVec);
607     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
608 
609     /// Walk up chain skipping non-aliasing memory nodes,
610     /// looking for aliasing nodes and adding them to the Aliases vector.
611     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
612                           SmallVectorImpl<SDValue> &Aliases);
613 
614     /// Return true if there is any possibility that the two addresses overlap.
615     bool isAlias(SDNode *Op0, SDNode *Op1) const;
616 
617     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
618     /// chain (aliasing node.)
619     SDValue FindBetterChain(SDNode *N, SDValue Chain);
620 
621     /// Try to replace a store and any possibly adjacent stores on
622     /// consecutive chains with better chains. Return true only if St is
623     /// replaced.
624     ///
625     /// Notice that other chains may still be replaced even if the function
626     /// returns false.
627     bool findBetterNeighborChains(StoreSDNode *St);
628 
629     // Helper for findBetterNeighborChains. Walk up store chain add additional
630     // chained stores that do not overlap and can be parallelized.
631     bool parallelizeChainedStores(StoreSDNode *St);
632 
633     /// Holds a pointer to an LSBaseSDNode as well as information on where it
634     /// is located in a sequence of memory operations connected by a chain.
635     struct MemOpLink {
636       // Ptr to the mem node.
637       LSBaseSDNode *MemNode;
638 
639       // Offset from the base ptr.
640       int64_t OffsetFromBase;
641 
642       MemOpLink(LSBaseSDNode *N, int64_t Offset)
643           : MemNode(N), OffsetFromBase(Offset) {}
644     };
645 
646     // Classify the origin of a stored value.
647     enum class StoreSource { Unknown, Constant, Extract, Load };
648     StoreSource getStoreSource(SDValue StoreVal) {
649       switch (StoreVal.getOpcode()) {
650       case ISD::Constant:
651       case ISD::ConstantFP:
652         return StoreSource::Constant;
653       case ISD::EXTRACT_VECTOR_ELT:
654       case ISD::EXTRACT_SUBVECTOR:
655         return StoreSource::Extract;
656       case ISD::LOAD:
657         return StoreSource::Load;
658       default:
659         return StoreSource::Unknown;
660       }
661     }
662 
663     /// This is a helper function for visitMUL to check the profitability
664     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
665     /// MulNode is the original multiply, AddNode is (add x, c1),
666     /// and ConstNode is c2.
667     bool isMulAddWithConstProfitable(SDNode *MulNode,
668                                      SDValue &AddNode,
669                                      SDValue &ConstNode);
670 
671     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
672     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
673     /// the type of the loaded value to be extended.
674     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
675                           EVT LoadResultTy, EVT &ExtVT);
676 
677     /// Helper function to calculate whether the given Load/Store can have its
678     /// width reduced to ExtVT.
679     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
680                            EVT &MemVT, unsigned ShAmt = 0);
681 
682     /// Used by BackwardsPropagateMask to find suitable loads.
683     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
684                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
685                            ConstantSDNode *Mask, SDNode *&NodeToMask);
686     /// Attempt to propagate a given AND node back to load leaves so that they
687     /// can be combined into narrow loads.
688     bool BackwardsPropagateMask(SDNode *N);
689 
690     /// Helper function for mergeConsecutiveStores which merges the component
691     /// store chains.
692     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
693                                 unsigned NumStores);
694 
695     /// This is a helper function for mergeConsecutiveStores. When the source
696     /// elements of the consecutive stores are all constants or all extracted
697     /// vector elements, try to merge them into one larger store introducing
698     /// bitcasts if necessary.  \return True if a merged store was created.
699     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
700                                          EVT MemVT, unsigned NumStores,
701                                          bool IsConstantSrc, bool UseVector,
702                                          bool UseTrunc);
703 
704     /// This is a helper function for mergeConsecutiveStores. Stores that
705     /// potentially may be merged with St are placed in StoreNodes. RootNode is
706     /// a chain predecessor to all store candidates.
707     void getStoreMergeCandidates(StoreSDNode *St,
708                                  SmallVectorImpl<MemOpLink> &StoreNodes,
709                                  SDNode *&Root);
710 
711     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
712     /// have indirect dependency through their operands. RootNode is the
713     /// predecessor to all stores calculated by getStoreMergeCandidates and is
714     /// used to prune the dependency check. \return True if safe to merge.
715     bool checkMergeStoreCandidatesForDependencies(
716         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
717         SDNode *RootNode);
718 
719     /// This is a helper function for mergeConsecutiveStores. Given a list of
720     /// store candidates, find the first N that are consecutive in memory.
721     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
722     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
723                                   int64_t ElementSizeBytes) const;
724 
725     /// This is a helper function for mergeConsecutiveStores. It is used for
726     /// store chains that are composed entirely of constant values.
727     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
728                                   unsigned NumConsecutiveStores,
729                                   EVT MemVT, SDNode *Root, bool AllowVectors);
730 
731     /// This is a helper function for mergeConsecutiveStores. It is used for
732     /// store chains that are composed entirely of extracted vector elements.
733     /// When extracting multiple vector elements, try to store them in one
734     /// vector store rather than a sequence of scalar stores.
735     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
736                                  unsigned NumConsecutiveStores, EVT MemVT,
737                                  SDNode *Root);
738 
739     /// This is a helper function for mergeConsecutiveStores. It is used for
740     /// store chains that are composed entirely of loaded values.
741     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
742                               unsigned NumConsecutiveStores, EVT MemVT,
743                               SDNode *Root, bool AllowVectors,
744                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
745 
746     /// Merge consecutive store operations into a wide store.
747     /// This optimization uses wide integers or vectors when possible.
748     /// \return true if stores were merged.
749     bool mergeConsecutiveStores(StoreSDNode *St);
750 
751     /// Try to transform a truncation where C is a constant:
752     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
753     ///
754     /// \p N needs to be a truncation and its first operand an AND. Other
755     /// requirements are checked by the function (e.g. that trunc is
756     /// single-use) and if missed an empty SDValue is returned.
757     SDValue distributeTruncateThroughAnd(SDNode *N);
758 
759     /// Helper function to determine whether the target supports operation
760     /// given by \p Opcode for type \p VT, that is, whether the operation
761     /// is legal or custom before legalizing operations, and whether is
762     /// legal (but not custom) after legalization.
763     bool hasOperation(unsigned Opcode, EVT VT) {
764       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
765     }
766 
767   public:
768     /// Runs the dag combiner on all nodes in the work list
769     void Run(CombineLevel AtLevel);
770 
771     SelectionDAG &getDAG() const { return DAG; }
772 
773     /// Returns a type large enough to hold any valid shift amount - before type
774     /// legalization these can be huge.
775     EVT getShiftAmountTy(EVT LHSTy) {
776       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
777       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
778     }
779 
780     /// This method returns true if we are running before type legalization or
781     /// if the specified VT is legal.
782     bool isTypeLegal(const EVT &VT) {
783       if (!LegalTypes) return true;
784       return TLI.isTypeLegal(VT);
785     }
786 
787     /// Convenience wrapper around TargetLowering::getSetCCResultType
788     EVT getSetCCResultType(EVT VT) const {
789       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
790     }
791 
792     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
793                          SDValue OrigLoad, SDValue ExtLoad,
794                          ISD::NodeType ExtType);
795   };
796 
797 /// This class is a DAGUpdateListener that removes any deleted
798 /// nodes from the worklist.
799 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
800   DAGCombiner &DC;
801 
802 public:
803   explicit WorklistRemover(DAGCombiner &dc)
804     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
805 
806   void NodeDeleted(SDNode *N, SDNode *E) override {
807     DC.removeFromWorklist(N);
808   }
809 };
810 
811 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
812   DAGCombiner &DC;
813 
814 public:
815   explicit WorklistInserter(DAGCombiner &dc)
816       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
817 
818   // FIXME: Ideally we could add N to the worklist, but this causes exponential
819   //        compile time costs in large DAGs, e.g. Halide.
820   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
821 };
822 
823 } // end anonymous namespace
824 
825 //===----------------------------------------------------------------------===//
826 //  TargetLowering::DAGCombinerInfo implementation
827 //===----------------------------------------------------------------------===//
828 
829 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
830   ((DAGCombiner*)DC)->AddToWorklist(N);
831 }
832 
833 SDValue TargetLowering::DAGCombinerInfo::
834 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
835   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
836 }
837 
838 SDValue TargetLowering::DAGCombinerInfo::
839 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
840   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
841 }
842 
843 SDValue TargetLowering::DAGCombinerInfo::
844 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
845   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
846 }
847 
848 bool TargetLowering::DAGCombinerInfo::
849 recursivelyDeleteUnusedNodes(SDNode *N) {
850   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
851 }
852 
853 void TargetLowering::DAGCombinerInfo::
854 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
855   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
856 }
857 
858 //===----------------------------------------------------------------------===//
859 // Helper Functions
860 //===----------------------------------------------------------------------===//
861 
862 void DAGCombiner::deleteAndRecombine(SDNode *N) {
863   removeFromWorklist(N);
864 
865   // If the operands of this node are only used by the node, they will now be
866   // dead. Make sure to re-visit them and recursively delete dead nodes.
867   for (const SDValue &Op : N->ops())
868     // For an operand generating multiple values, one of the values may
869     // become dead allowing further simplification (e.g. split index
870     // arithmetic from an indexed load).
871     if (Op->hasOneUse() || Op->getNumValues() > 1)
872       AddToWorklist(Op.getNode());
873 
874   DAG.DeleteNode(N);
875 }
876 
877 // APInts must be the same size for most operations, this helper
878 // function zero extends the shorter of the pair so that they match.
879 // We provide an Offset so that we can create bitwidths that won't overflow.
880 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
881   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
882   LHS = LHS.zextOrSelf(Bits);
883   RHS = RHS.zextOrSelf(Bits);
884 }
885 
886 // Return true if this node is a setcc, or is a select_cc
887 // that selects between the target values used for true and false, making it
888 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
889 // the appropriate nodes based on the type of node we are checking. This
890 // simplifies life a bit for the callers.
891 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
892                                     SDValue &CC, bool MatchStrict) const {
893   if (N.getOpcode() == ISD::SETCC) {
894     LHS = N.getOperand(0);
895     RHS = N.getOperand(1);
896     CC  = N.getOperand(2);
897     return true;
898   }
899 
900   if (MatchStrict &&
901       (N.getOpcode() == ISD::STRICT_FSETCC ||
902        N.getOpcode() == ISD::STRICT_FSETCCS)) {
903     LHS = N.getOperand(1);
904     RHS = N.getOperand(2);
905     CC  = N.getOperand(3);
906     return true;
907   }
908 
909   if (N.getOpcode() != ISD::SELECT_CC ||
910       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
911       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
912     return false;
913 
914   if (TLI.getBooleanContents(N.getValueType()) ==
915       TargetLowering::UndefinedBooleanContent)
916     return false;
917 
918   LHS = N.getOperand(0);
919   RHS = N.getOperand(1);
920   CC  = N.getOperand(4);
921   return true;
922 }
923 
924 /// Return true if this is a SetCC-equivalent operation with only one use.
925 /// If this is true, it allows the users to invert the operation for free when
926 /// it is profitable to do so.
927 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
928   SDValue N0, N1, N2;
929   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
930     return true;
931   return false;
932 }
933 
934 // Returns the SDNode if it is a constant float BuildVector
935 // or constant float.
936 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
937   if (isa<ConstantFPSDNode>(N))
938     return N.getNode();
939   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
940     return N.getNode();
941   return nullptr;
942 }
943 
944 // Determines if it is a constant integer or a build vector of constant
945 // integers (and undefs).
946 // Do not permit build vector implicit truncation.
947 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
948   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
949     return !(Const->isOpaque() && NoOpaques);
950   if (N.getOpcode() != ISD::BUILD_VECTOR)
951     return false;
952   unsigned BitWidth = N.getScalarValueSizeInBits();
953   for (const SDValue &Op : N->op_values()) {
954     if (Op.isUndef())
955       continue;
956     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
957     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
958         (Const->isOpaque() && NoOpaques))
959       return false;
960   }
961   return true;
962 }
963 
964 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
965 // undef's.
966 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
967   if (V.getOpcode() != ISD::BUILD_VECTOR)
968     return false;
969   return isConstantOrConstantVector(V, NoOpaques) ||
970          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
971 }
972 
973 // Determine if this an indexed load with an opaque target constant index.
974 static bool canSplitIdx(LoadSDNode *LD) {
975   return MaySplitLoadIndex &&
976          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
977           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
978 }
979 
980 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
981                                                              const SDLoc &DL,
982                                                              SDValue N0,
983                                                              SDValue N1) {
984   // Currently this only tries to ensure we don't undo the GEP splits done by
985   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
986   // we check if the following transformation would be problematic:
987   // (load/store (add, (add, x, offset1), offset2)) ->
988   // (load/store (add, x, offset1+offset2)).
989 
990   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
991     return false;
992 
993   if (N0.hasOneUse())
994     return false;
995 
996   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
997   auto *C2 = dyn_cast<ConstantSDNode>(N1);
998   if (!C1 || !C2)
999     return false;
1000 
1001   const APInt &C1APIntVal = C1->getAPIntValue();
1002   const APInt &C2APIntVal = C2->getAPIntValue();
1003   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1004     return false;
1005 
1006   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1007   if (CombinedValueIntVal.getBitWidth() > 64)
1008     return false;
1009   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1010 
1011   for (SDNode *Node : N0->uses()) {
1012     auto LoadStore = dyn_cast<MemSDNode>(Node);
1013     if (LoadStore) {
1014       // Is x[offset2] already not a legal addressing mode? If so then
1015       // reassociating the constants breaks nothing (we test offset2 because
1016       // that's the one we hope to fold into the load or store).
1017       TargetLoweringBase::AddrMode AM;
1018       AM.HasBaseReg = true;
1019       AM.BaseOffs = C2APIntVal.getSExtValue();
1020       EVT VT = LoadStore->getMemoryVT();
1021       unsigned AS = LoadStore->getAddressSpace();
1022       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1023       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1024         continue;
1025 
1026       // Would x[offset1+offset2] still be a legal addressing mode?
1027       AM.BaseOffs = CombinedValue;
1028       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1029         return true;
1030     }
1031   }
1032 
1033   return false;
1034 }
1035 
1036 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1037 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1038 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1039                                                SDValue N0, SDValue N1) {
1040   EVT VT = N0.getValueType();
1041 
1042   if (N0.getOpcode() != Opc)
1043     return SDValue();
1044 
1045   if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1046     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1047       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1048       if (SDValue OpNode =
1049               DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1050         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1051       return SDValue();
1052     }
1053     if (N0.hasOneUse()) {
1054       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1055       //              iff (op x, c1) has one use
1056       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1057       if (!OpNode.getNode())
1058         return SDValue();
1059       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1060     }
1061   }
1062   return SDValue();
1063 }
1064 
1065 // Try to reassociate commutative binops.
1066 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1067                                     SDValue N1, SDNodeFlags Flags) {
1068   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1069 
1070   // Floating-point reassociation is not allowed without loose FP math.
1071   if (N0.getValueType().isFloatingPoint() ||
1072       N1.getValueType().isFloatingPoint())
1073     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1074       return SDValue();
1075 
1076   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1077     return Combined;
1078   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1079     return Combined;
1080   return SDValue();
1081 }
1082 
1083 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1084                                bool AddTo) {
1085   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1086   ++NodesCombined;
1087   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1088              To[0].getNode()->dump(&DAG);
1089              dbgs() << " and " << NumTo - 1 << " other values\n");
1090   for (unsigned i = 0, e = NumTo; i != e; ++i)
1091     assert((!To[i].getNode() ||
1092             N->getValueType(i) == To[i].getValueType()) &&
1093            "Cannot combine value to value of different type!");
1094 
1095   WorklistRemover DeadNodes(*this);
1096   DAG.ReplaceAllUsesWith(N, To);
1097   if (AddTo) {
1098     // Push the new nodes and any users onto the worklist
1099     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1100       if (To[i].getNode()) {
1101         AddToWorklist(To[i].getNode());
1102         AddUsersToWorklist(To[i].getNode());
1103       }
1104     }
1105   }
1106 
1107   // Finally, if the node is now dead, remove it from the graph.  The node
1108   // may not be dead if the replacement process recursively simplified to
1109   // something else needing this node.
1110   if (N->use_empty())
1111     deleteAndRecombine(N);
1112   return SDValue(N, 0);
1113 }
1114 
1115 void DAGCombiner::
1116 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1117   // Replace the old value with the new one.
1118   ++NodesCombined;
1119   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1120              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1121              dbgs() << '\n');
1122 
1123   // Replace all uses.  If any nodes become isomorphic to other nodes and
1124   // are deleted, make sure to remove them from our worklist.
1125   WorklistRemover DeadNodes(*this);
1126   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1127 
1128   // Push the new node and any (possibly new) users onto the worklist.
1129   AddToWorklistWithUsers(TLO.New.getNode());
1130 
1131   // Finally, if the node is now dead, remove it from the graph.  The node
1132   // may not be dead if the replacement process recursively simplified to
1133   // something else needing this node.
1134   if (TLO.Old.getNode()->use_empty())
1135     deleteAndRecombine(TLO.Old.getNode());
1136 }
1137 
1138 /// Check the specified integer node value to see if it can be simplified or if
1139 /// things it uses can be simplified by bit propagation. If so, return true.
1140 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1141                                        const APInt &DemandedElts,
1142                                        bool AssumeSingleUse) {
1143   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1144   KnownBits Known;
1145   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1146                                 AssumeSingleUse))
1147     return false;
1148 
1149   // Revisit the node.
1150   AddToWorklist(Op.getNode());
1151 
1152   CommitTargetLoweringOpt(TLO);
1153   return true;
1154 }
1155 
1156 /// Check the specified vector node value to see if it can be simplified or
1157 /// if things it uses can be simplified as it only uses some of the elements.
1158 /// If so, return true.
1159 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1160                                              const APInt &DemandedElts,
1161                                              bool AssumeSingleUse) {
1162   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1163   APInt KnownUndef, KnownZero;
1164   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1165                                       TLO, 0, AssumeSingleUse))
1166     return false;
1167 
1168   // Revisit the node.
1169   AddToWorklist(Op.getNode());
1170 
1171   CommitTargetLoweringOpt(TLO);
1172   return true;
1173 }
1174 
1175 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1176   SDLoc DL(Load);
1177   EVT VT = Load->getValueType(0);
1178   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1179 
1180   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1181              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1182   WorklistRemover DeadNodes(*this);
1183   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1184   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1185   deleteAndRecombine(Load);
1186   AddToWorklist(Trunc.getNode());
1187 }
1188 
1189 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1190   Replace = false;
1191   SDLoc DL(Op);
1192   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1193     LoadSDNode *LD = cast<LoadSDNode>(Op);
1194     EVT MemVT = LD->getMemoryVT();
1195     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1196                                                       : LD->getExtensionType();
1197     Replace = true;
1198     return DAG.getExtLoad(ExtType, DL, PVT,
1199                           LD->getChain(), LD->getBasePtr(),
1200                           MemVT, LD->getMemOperand());
1201   }
1202 
1203   unsigned Opc = Op.getOpcode();
1204   switch (Opc) {
1205   default: break;
1206   case ISD::AssertSext:
1207     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1208       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1209     break;
1210   case ISD::AssertZext:
1211     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1212       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1213     break;
1214   case ISD::Constant: {
1215     unsigned ExtOpc =
1216       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1217     return DAG.getNode(ExtOpc, DL, PVT, Op);
1218   }
1219   }
1220 
1221   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1222     return SDValue();
1223   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1224 }
1225 
1226 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1227   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1228     return SDValue();
1229   EVT OldVT = Op.getValueType();
1230   SDLoc DL(Op);
1231   bool Replace = false;
1232   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1233   if (!NewOp.getNode())
1234     return SDValue();
1235   AddToWorklist(NewOp.getNode());
1236 
1237   if (Replace)
1238     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1239   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1240                      DAG.getValueType(OldVT));
1241 }
1242 
1243 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1244   EVT OldVT = Op.getValueType();
1245   SDLoc DL(Op);
1246   bool Replace = false;
1247   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1248   if (!NewOp.getNode())
1249     return SDValue();
1250   AddToWorklist(NewOp.getNode());
1251 
1252   if (Replace)
1253     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1254   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1255 }
1256 
1257 /// Promote the specified integer binary operation if the target indicates it is
1258 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1259 /// i32 since i16 instructions are longer.
1260 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1261   if (!LegalOperations)
1262     return SDValue();
1263 
1264   EVT VT = Op.getValueType();
1265   if (VT.isVector() || !VT.isInteger())
1266     return SDValue();
1267 
1268   // If operation type is 'undesirable', e.g. i16 on x86, consider
1269   // promoting it.
1270   unsigned Opc = Op.getOpcode();
1271   if (TLI.isTypeDesirableForOp(Opc, VT))
1272     return SDValue();
1273 
1274   EVT PVT = VT;
1275   // Consult target whether it is a good idea to promote this operation and
1276   // what's the right type to promote it to.
1277   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1278     assert(PVT != VT && "Don't know what type to promote to!");
1279 
1280     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1281 
1282     bool Replace0 = false;
1283     SDValue N0 = Op.getOperand(0);
1284     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1285 
1286     bool Replace1 = false;
1287     SDValue N1 = Op.getOperand(1);
1288     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1289     SDLoc DL(Op);
1290 
1291     SDValue RV =
1292         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1293 
1294     // We are always replacing N0/N1's use in N and only need additional
1295     // replacements if there are additional uses.
1296     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1297     //       (SDValue) here because the node may reference multiple values
1298     //       (for example, the chain value of a load node).
1299     Replace0 &= !N0->hasOneUse();
1300     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1301 
1302     // Combine Op here so it is preserved past replacements.
1303     CombineTo(Op.getNode(), RV);
1304 
1305     // If operands have a use ordering, make sure we deal with
1306     // predecessor first.
1307     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1308       std::swap(N0, N1);
1309       std::swap(NN0, NN1);
1310     }
1311 
1312     if (Replace0) {
1313       AddToWorklist(NN0.getNode());
1314       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1315     }
1316     if (Replace1) {
1317       AddToWorklist(NN1.getNode());
1318       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1319     }
1320     return Op;
1321   }
1322   return SDValue();
1323 }
1324 
1325 /// Promote the specified integer shift operation if the target indicates it is
1326 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1327 /// i32 since i16 instructions are longer.
1328 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1329   if (!LegalOperations)
1330     return SDValue();
1331 
1332   EVT VT = Op.getValueType();
1333   if (VT.isVector() || !VT.isInteger())
1334     return SDValue();
1335 
1336   // If operation type is 'undesirable', e.g. i16 on x86, consider
1337   // promoting it.
1338   unsigned Opc = Op.getOpcode();
1339   if (TLI.isTypeDesirableForOp(Opc, VT))
1340     return SDValue();
1341 
1342   EVT PVT = VT;
1343   // Consult target whether it is a good idea to promote this operation and
1344   // what's the right type to promote it to.
1345   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1346     assert(PVT != VT && "Don't know what type to promote to!");
1347 
1348     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1349 
1350     bool Replace = false;
1351     SDValue N0 = Op.getOperand(0);
1352     SDValue N1 = Op.getOperand(1);
1353     if (Opc == ISD::SRA)
1354       N0 = SExtPromoteOperand(N0, PVT);
1355     else if (Opc == ISD::SRL)
1356       N0 = ZExtPromoteOperand(N0, PVT);
1357     else
1358       N0 = PromoteOperand(N0, PVT, Replace);
1359 
1360     if (!N0.getNode())
1361       return SDValue();
1362 
1363     SDLoc DL(Op);
1364     SDValue RV =
1365         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1366 
1367     if (Replace)
1368       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1369 
1370     // Deal with Op being deleted.
1371     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1372       return RV;
1373   }
1374   return SDValue();
1375 }
1376 
1377 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1378   if (!LegalOperations)
1379     return SDValue();
1380 
1381   EVT VT = Op.getValueType();
1382   if (VT.isVector() || !VT.isInteger())
1383     return SDValue();
1384 
1385   // If operation type is 'undesirable', e.g. i16 on x86, consider
1386   // promoting it.
1387   unsigned Opc = Op.getOpcode();
1388   if (TLI.isTypeDesirableForOp(Opc, VT))
1389     return SDValue();
1390 
1391   EVT PVT = VT;
1392   // Consult target whether it is a good idea to promote this operation and
1393   // what's the right type to promote it to.
1394   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1395     assert(PVT != VT && "Don't know what type to promote to!");
1396     // fold (aext (aext x)) -> (aext x)
1397     // fold (aext (zext x)) -> (zext x)
1398     // fold (aext (sext x)) -> (sext x)
1399     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1400     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1401   }
1402   return SDValue();
1403 }
1404 
1405 bool DAGCombiner::PromoteLoad(SDValue Op) {
1406   if (!LegalOperations)
1407     return false;
1408 
1409   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1410     return false;
1411 
1412   EVT VT = Op.getValueType();
1413   if (VT.isVector() || !VT.isInteger())
1414     return false;
1415 
1416   // If operation type is 'undesirable', e.g. i16 on x86, consider
1417   // promoting it.
1418   unsigned Opc = Op.getOpcode();
1419   if (TLI.isTypeDesirableForOp(Opc, VT))
1420     return false;
1421 
1422   EVT PVT = VT;
1423   // Consult target whether it is a good idea to promote this operation and
1424   // what's the right type to promote it to.
1425   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1426     assert(PVT != VT && "Don't know what type to promote to!");
1427 
1428     SDLoc DL(Op);
1429     SDNode *N = Op.getNode();
1430     LoadSDNode *LD = cast<LoadSDNode>(N);
1431     EVT MemVT = LD->getMemoryVT();
1432     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1433                                                       : LD->getExtensionType();
1434     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1435                                    LD->getChain(), LD->getBasePtr(),
1436                                    MemVT, LD->getMemOperand());
1437     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1438 
1439     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1440                Result.getNode()->dump(&DAG); dbgs() << '\n');
1441     WorklistRemover DeadNodes(*this);
1442     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1443     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1444     deleteAndRecombine(N);
1445     AddToWorklist(Result.getNode());
1446     return true;
1447   }
1448   return false;
1449 }
1450 
1451 /// Recursively delete a node which has no uses and any operands for
1452 /// which it is the only use.
1453 ///
1454 /// Note that this both deletes the nodes and removes them from the worklist.
1455 /// It also adds any nodes who have had a user deleted to the worklist as they
1456 /// may now have only one use and subject to other combines.
1457 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1458   if (!N->use_empty())
1459     return false;
1460 
1461   SmallSetVector<SDNode *, 16> Nodes;
1462   Nodes.insert(N);
1463   do {
1464     N = Nodes.pop_back_val();
1465     if (!N)
1466       continue;
1467 
1468     if (N->use_empty()) {
1469       for (const SDValue &ChildN : N->op_values())
1470         Nodes.insert(ChildN.getNode());
1471 
1472       removeFromWorklist(N);
1473       DAG.DeleteNode(N);
1474     } else {
1475       AddToWorklist(N);
1476     }
1477   } while (!Nodes.empty());
1478   return true;
1479 }
1480 
1481 //===----------------------------------------------------------------------===//
1482 //  Main DAG Combiner implementation
1483 //===----------------------------------------------------------------------===//
1484 
1485 void DAGCombiner::Run(CombineLevel AtLevel) {
1486   // set the instance variables, so that the various visit routines may use it.
1487   Level = AtLevel;
1488   LegalDAG = Level >= AfterLegalizeDAG;
1489   LegalOperations = Level >= AfterLegalizeVectorOps;
1490   LegalTypes = Level >= AfterLegalizeTypes;
1491 
1492   WorklistInserter AddNodes(*this);
1493 
1494   // Add all the dag nodes to the worklist.
1495   for (SDNode &Node : DAG.allnodes())
1496     AddToWorklist(&Node);
1497 
1498   // Create a dummy node (which is not added to allnodes), that adds a reference
1499   // to the root node, preventing it from being deleted, and tracking any
1500   // changes of the root.
1501   HandleSDNode Dummy(DAG.getRoot());
1502 
1503   // While we have a valid worklist entry node, try to combine it.
1504   while (SDNode *N = getNextWorklistEntry()) {
1505     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1506     // N is deleted from the DAG, since they too may now be dead or may have a
1507     // reduced number of uses, allowing other xforms.
1508     if (recursivelyDeleteUnusedNodes(N))
1509       continue;
1510 
1511     WorklistRemover DeadNodes(*this);
1512 
1513     // If this combine is running after legalizing the DAG, re-legalize any
1514     // nodes pulled off the worklist.
1515     if (LegalDAG) {
1516       SmallSetVector<SDNode *, 16> UpdatedNodes;
1517       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1518 
1519       for (SDNode *LN : UpdatedNodes)
1520         AddToWorklistWithUsers(LN);
1521 
1522       if (!NIsValid)
1523         continue;
1524     }
1525 
1526     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1527 
1528     // Add any operands of the new node which have not yet been combined to the
1529     // worklist as well. Because the worklist uniques things already, this
1530     // won't repeatedly process the same operand.
1531     CombinedNodes.insert(N);
1532     for (const SDValue &ChildN : N->op_values())
1533       if (!CombinedNodes.count(ChildN.getNode()))
1534         AddToWorklist(ChildN.getNode());
1535 
1536     SDValue RV = combine(N);
1537 
1538     if (!RV.getNode())
1539       continue;
1540 
1541     ++NodesCombined;
1542 
1543     // If we get back the same node we passed in, rather than a new node or
1544     // zero, we know that the node must have defined multiple values and
1545     // CombineTo was used.  Since CombineTo takes care of the worklist
1546     // mechanics for us, we have no work to do in this case.
1547     if (RV.getNode() == N)
1548       continue;
1549 
1550     assert(N->getOpcode() != ISD::DELETED_NODE &&
1551            RV.getOpcode() != ISD::DELETED_NODE &&
1552            "Node was deleted but visit returned new node!");
1553 
1554     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1555 
1556     if (N->getNumValues() == RV.getNode()->getNumValues())
1557       DAG.ReplaceAllUsesWith(N, RV.getNode());
1558     else {
1559       assert(N->getValueType(0) == RV.getValueType() &&
1560              N->getNumValues() == 1 && "Type mismatch");
1561       DAG.ReplaceAllUsesWith(N, &RV);
1562     }
1563 
1564     // Push the new node and any users onto the worklist.  Omit this if the
1565     // new node is the EntryToken (e.g. if a store managed to get optimized
1566     // out), because re-visiting the EntryToken and its users will not uncover
1567     // any additional opportunities, but there may be a large number of such
1568     // users, potentially causing compile time explosion.
1569     if (RV.getOpcode() != ISD::EntryToken) {
1570       AddToWorklist(RV.getNode());
1571       AddUsersToWorklist(RV.getNode());
1572     }
1573 
1574     // Finally, if the node is now dead, remove it from the graph.  The node
1575     // may not be dead if the replacement process recursively simplified to
1576     // something else needing this node. This will also take care of adding any
1577     // operands which have lost a user to the worklist.
1578     recursivelyDeleteUnusedNodes(N);
1579   }
1580 
1581   // If the root changed (e.g. it was a dead load, update the root).
1582   DAG.setRoot(Dummy.getValue());
1583   DAG.RemoveDeadNodes();
1584 }
1585 
1586 SDValue DAGCombiner::visit(SDNode *N) {
1587   switch (N->getOpcode()) {
1588   default: break;
1589   case ISD::TokenFactor:        return visitTokenFactor(N);
1590   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1591   case ISD::ADD:                return visitADD(N);
1592   case ISD::SUB:                return visitSUB(N);
1593   case ISD::SADDSAT:
1594   case ISD::UADDSAT:            return visitADDSAT(N);
1595   case ISD::SSUBSAT:
1596   case ISD::USUBSAT:            return visitSUBSAT(N);
1597   case ISD::ADDC:               return visitADDC(N);
1598   case ISD::SADDO:
1599   case ISD::UADDO:              return visitADDO(N);
1600   case ISD::SUBC:               return visitSUBC(N);
1601   case ISD::SSUBO:
1602   case ISD::USUBO:              return visitSUBO(N);
1603   case ISD::ADDE:               return visitADDE(N);
1604   case ISD::ADDCARRY:           return visitADDCARRY(N);
1605   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1606   case ISD::SUBE:               return visitSUBE(N);
1607   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1608   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1609   case ISD::SMULFIX:
1610   case ISD::SMULFIXSAT:
1611   case ISD::UMULFIX:
1612   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1613   case ISD::MUL:                return visitMUL(N);
1614   case ISD::SDIV:               return visitSDIV(N);
1615   case ISD::UDIV:               return visitUDIV(N);
1616   case ISD::SREM:
1617   case ISD::UREM:               return visitREM(N);
1618   case ISD::MULHU:              return visitMULHU(N);
1619   case ISD::MULHS:              return visitMULHS(N);
1620   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1621   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1622   case ISD::SMULO:
1623   case ISD::UMULO:              return visitMULO(N);
1624   case ISD::SMIN:
1625   case ISD::SMAX:
1626   case ISD::UMIN:
1627   case ISD::UMAX:               return visitIMINMAX(N);
1628   case ISD::AND:                return visitAND(N);
1629   case ISD::OR:                 return visitOR(N);
1630   case ISD::XOR:                return visitXOR(N);
1631   case ISD::SHL:                return visitSHL(N);
1632   case ISD::SRA:                return visitSRA(N);
1633   case ISD::SRL:                return visitSRL(N);
1634   case ISD::ROTR:
1635   case ISD::ROTL:               return visitRotate(N);
1636   case ISD::FSHL:
1637   case ISD::FSHR:               return visitFunnelShift(N);
1638   case ISD::ABS:                return visitABS(N);
1639   case ISD::BSWAP:              return visitBSWAP(N);
1640   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1641   case ISD::CTLZ:               return visitCTLZ(N);
1642   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1643   case ISD::CTTZ:               return visitCTTZ(N);
1644   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1645   case ISD::CTPOP:              return visitCTPOP(N);
1646   case ISD::SELECT:             return visitSELECT(N);
1647   case ISD::VSELECT:            return visitVSELECT(N);
1648   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1649   case ISD::SETCC:              return visitSETCC(N);
1650   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1651   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1652   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1653   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1654   case ISD::AssertSext:
1655   case ISD::AssertZext:         return visitAssertExt(N);
1656   case ISD::AssertAlign:        return visitAssertAlign(N);
1657   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1658   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1659   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1660   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1661   case ISD::BITCAST:            return visitBITCAST(N);
1662   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1663   case ISD::FADD:               return visitFADD(N);
1664   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1665   case ISD::FSUB:               return visitFSUB(N);
1666   case ISD::FMUL:               return visitFMUL(N);
1667   case ISD::FMA:                return visitFMA(N);
1668   case ISD::FDIV:               return visitFDIV(N);
1669   case ISD::FREM:               return visitFREM(N);
1670   case ISD::FSQRT:              return visitFSQRT(N);
1671   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1672   case ISD::FPOW:               return visitFPOW(N);
1673   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1674   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1675   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1676   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1677   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1678   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1679   case ISD::FNEG:               return visitFNEG(N);
1680   case ISD::FABS:               return visitFABS(N);
1681   case ISD::FFLOOR:             return visitFFLOOR(N);
1682   case ISD::FMINNUM:            return visitFMINNUM(N);
1683   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1684   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1685   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1686   case ISD::FCEIL:              return visitFCEIL(N);
1687   case ISD::FTRUNC:             return visitFTRUNC(N);
1688   case ISD::BRCOND:             return visitBRCOND(N);
1689   case ISD::BR_CC:              return visitBR_CC(N);
1690   case ISD::LOAD:               return visitLOAD(N);
1691   case ISD::STORE:              return visitSTORE(N);
1692   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1693   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1694   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1695   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1696   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1697   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1698   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1699   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1700   case ISD::MGATHER:            return visitMGATHER(N);
1701   case ISD::MLOAD:              return visitMLOAD(N);
1702   case ISD::MSCATTER:           return visitMSCATTER(N);
1703   case ISD::MSTORE:             return visitMSTORE(N);
1704   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1705   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1706   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1707   case ISD::FREEZE:             return visitFREEZE(N);
1708   case ISD::VECREDUCE_FADD:
1709   case ISD::VECREDUCE_FMUL:
1710   case ISD::VECREDUCE_ADD:
1711   case ISD::VECREDUCE_MUL:
1712   case ISD::VECREDUCE_AND:
1713   case ISD::VECREDUCE_OR:
1714   case ISD::VECREDUCE_XOR:
1715   case ISD::VECREDUCE_SMAX:
1716   case ISD::VECREDUCE_SMIN:
1717   case ISD::VECREDUCE_UMAX:
1718   case ISD::VECREDUCE_UMIN:
1719   case ISD::VECREDUCE_FMAX:
1720   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1721   }
1722   return SDValue();
1723 }
1724 
1725 SDValue DAGCombiner::combine(SDNode *N) {
1726   SDValue RV;
1727   if (!DisableGenericCombines)
1728     RV = visit(N);
1729 
1730   // If nothing happened, try a target-specific DAG combine.
1731   if (!RV.getNode()) {
1732     assert(N->getOpcode() != ISD::DELETED_NODE &&
1733            "Node was deleted but visit returned NULL!");
1734 
1735     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1736         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1737 
1738       // Expose the DAG combiner to the target combiner impls.
1739       TargetLowering::DAGCombinerInfo
1740         DagCombineInfo(DAG, Level, false, this);
1741 
1742       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1743     }
1744   }
1745 
1746   // If nothing happened still, try promoting the operation.
1747   if (!RV.getNode()) {
1748     switch (N->getOpcode()) {
1749     default: break;
1750     case ISD::ADD:
1751     case ISD::SUB:
1752     case ISD::MUL:
1753     case ISD::AND:
1754     case ISD::OR:
1755     case ISD::XOR:
1756       RV = PromoteIntBinOp(SDValue(N, 0));
1757       break;
1758     case ISD::SHL:
1759     case ISD::SRA:
1760     case ISD::SRL:
1761       RV = PromoteIntShiftOp(SDValue(N, 0));
1762       break;
1763     case ISD::SIGN_EXTEND:
1764     case ISD::ZERO_EXTEND:
1765     case ISD::ANY_EXTEND:
1766       RV = PromoteExtend(SDValue(N, 0));
1767       break;
1768     case ISD::LOAD:
1769       if (PromoteLoad(SDValue(N, 0)))
1770         RV = SDValue(N, 0);
1771       break;
1772     }
1773   }
1774 
1775   // If N is a commutative binary node, try to eliminate it if the commuted
1776   // version is already present in the DAG.
1777   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1778       N->getNumValues() == 1) {
1779     SDValue N0 = N->getOperand(0);
1780     SDValue N1 = N->getOperand(1);
1781 
1782     // Constant operands are canonicalized to RHS.
1783     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1784       SDValue Ops[] = {N1, N0};
1785       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1786                                             N->getFlags());
1787       if (CSENode)
1788         return SDValue(CSENode, 0);
1789     }
1790   }
1791 
1792   return RV;
1793 }
1794 
1795 /// Given a node, return its input chain if it has one, otherwise return a null
1796 /// sd operand.
1797 static SDValue getInputChainForNode(SDNode *N) {
1798   if (unsigned NumOps = N->getNumOperands()) {
1799     if (N->getOperand(0).getValueType() == MVT::Other)
1800       return N->getOperand(0);
1801     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1802       return N->getOperand(NumOps-1);
1803     for (unsigned i = 1; i < NumOps-1; ++i)
1804       if (N->getOperand(i).getValueType() == MVT::Other)
1805         return N->getOperand(i);
1806   }
1807   return SDValue();
1808 }
1809 
1810 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1811   // If N has two operands, where one has an input chain equal to the other,
1812   // the 'other' chain is redundant.
1813   if (N->getNumOperands() == 2) {
1814     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1815       return N->getOperand(0);
1816     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1817       return N->getOperand(1);
1818   }
1819 
1820   // Don't simplify token factors if optnone.
1821   if (OptLevel == CodeGenOpt::None)
1822     return SDValue();
1823 
1824   // Don't simplify the token factor if the node itself has too many operands.
1825   if (N->getNumOperands() > TokenFactorInlineLimit)
1826     return SDValue();
1827 
1828   // If the sole user is a token factor, we should make sure we have a
1829   // chance to merge them together. This prevents TF chains from inhibiting
1830   // optimizations.
1831   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1832     AddToWorklist(*(N->use_begin()));
1833 
1834   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1835   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1836   SmallPtrSet<SDNode*, 16> SeenOps;
1837   bool Changed = false;             // If we should replace this token factor.
1838 
1839   // Start out with this token factor.
1840   TFs.push_back(N);
1841 
1842   // Iterate through token factors.  The TFs grows when new token factors are
1843   // encountered.
1844   for (unsigned i = 0; i < TFs.size(); ++i) {
1845     // Limit number of nodes to inline, to avoid quadratic compile times.
1846     // We have to add the outstanding Token Factors to Ops, otherwise we might
1847     // drop Ops from the resulting Token Factors.
1848     if (Ops.size() > TokenFactorInlineLimit) {
1849       for (unsigned j = i; j < TFs.size(); j++)
1850         Ops.emplace_back(TFs[j], 0);
1851       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1852       // combiner worklist later.
1853       TFs.resize(i);
1854       break;
1855     }
1856 
1857     SDNode *TF = TFs[i];
1858     // Check each of the operands.
1859     for (const SDValue &Op : TF->op_values()) {
1860       switch (Op.getOpcode()) {
1861       case ISD::EntryToken:
1862         // Entry tokens don't need to be added to the list. They are
1863         // redundant.
1864         Changed = true;
1865         break;
1866 
1867       case ISD::TokenFactor:
1868         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1869           // Queue up for processing.
1870           TFs.push_back(Op.getNode());
1871           Changed = true;
1872           break;
1873         }
1874         LLVM_FALLTHROUGH;
1875 
1876       default:
1877         // Only add if it isn't already in the list.
1878         if (SeenOps.insert(Op.getNode()).second)
1879           Ops.push_back(Op);
1880         else
1881           Changed = true;
1882         break;
1883       }
1884     }
1885   }
1886 
1887   // Re-visit inlined Token Factors, to clean them up in case they have been
1888   // removed. Skip the first Token Factor, as this is the current node.
1889   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1890     AddToWorklist(TFs[i]);
1891 
1892   // Remove Nodes that are chained to another node in the list. Do so
1893   // by walking up chains breath-first stopping when we've seen
1894   // another operand. In general we must climb to the EntryNode, but we can exit
1895   // early if we find all remaining work is associated with just one operand as
1896   // no further pruning is possible.
1897 
1898   // List of nodes to search through and original Ops from which they originate.
1899   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1900   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1901   SmallPtrSet<SDNode *, 16> SeenChains;
1902   bool DidPruneOps = false;
1903 
1904   unsigned NumLeftToConsider = 0;
1905   for (const SDValue &Op : Ops) {
1906     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1907     OpWorkCount.push_back(1);
1908   }
1909 
1910   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1911     // If this is an Op, we can remove the op from the list. Remark any
1912     // search associated with it as from the current OpNumber.
1913     if (SeenOps.count(Op) != 0) {
1914       Changed = true;
1915       DidPruneOps = true;
1916       unsigned OrigOpNumber = 0;
1917       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1918         OrigOpNumber++;
1919       assert((OrigOpNumber != Ops.size()) &&
1920              "expected to find TokenFactor Operand");
1921       // Re-mark worklist from OrigOpNumber to OpNumber
1922       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1923         if (Worklist[i].second == OrigOpNumber) {
1924           Worklist[i].second = OpNumber;
1925         }
1926       }
1927       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1928       OpWorkCount[OrigOpNumber] = 0;
1929       NumLeftToConsider--;
1930     }
1931     // Add if it's a new chain
1932     if (SeenChains.insert(Op).second) {
1933       OpWorkCount[OpNumber]++;
1934       Worklist.push_back(std::make_pair(Op, OpNumber));
1935     }
1936   };
1937 
1938   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1939     // We need at least be consider at least 2 Ops to prune.
1940     if (NumLeftToConsider <= 1)
1941       break;
1942     auto CurNode = Worklist[i].first;
1943     auto CurOpNumber = Worklist[i].second;
1944     assert((OpWorkCount[CurOpNumber] > 0) &&
1945            "Node should not appear in worklist");
1946     switch (CurNode->getOpcode()) {
1947     case ISD::EntryToken:
1948       // Hitting EntryToken is the only way for the search to terminate without
1949       // hitting
1950       // another operand's search. Prevent us from marking this operand
1951       // considered.
1952       NumLeftToConsider++;
1953       break;
1954     case ISD::TokenFactor:
1955       for (const SDValue &Op : CurNode->op_values())
1956         AddToWorklist(i, Op.getNode(), CurOpNumber);
1957       break;
1958     case ISD::LIFETIME_START:
1959     case ISD::LIFETIME_END:
1960     case ISD::CopyFromReg:
1961     case ISD::CopyToReg:
1962       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1963       break;
1964     default:
1965       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1966         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1967       break;
1968     }
1969     OpWorkCount[CurOpNumber]--;
1970     if (OpWorkCount[CurOpNumber] == 0)
1971       NumLeftToConsider--;
1972   }
1973 
1974   // If we've changed things around then replace token factor.
1975   if (Changed) {
1976     SDValue Result;
1977     if (Ops.empty()) {
1978       // The entry token is the only possible outcome.
1979       Result = DAG.getEntryNode();
1980     } else {
1981       if (DidPruneOps) {
1982         SmallVector<SDValue, 8> PrunedOps;
1983         //
1984         for (const SDValue &Op : Ops) {
1985           if (SeenChains.count(Op.getNode()) == 0)
1986             PrunedOps.push_back(Op);
1987         }
1988         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1989       } else {
1990         Result = DAG.getTokenFactor(SDLoc(N), Ops);
1991       }
1992     }
1993     return Result;
1994   }
1995   return SDValue();
1996 }
1997 
1998 /// MERGE_VALUES can always be eliminated.
1999 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2000   WorklistRemover DeadNodes(*this);
2001   // Replacing results may cause a different MERGE_VALUES to suddenly
2002   // be CSE'd with N, and carry its uses with it. Iterate until no
2003   // uses remain, to ensure that the node can be safely deleted.
2004   // First add the users of this node to the work list so that they
2005   // can be tried again once they have new operands.
2006   AddUsersToWorklist(N);
2007   do {
2008     // Do as a single replacement to avoid rewalking use lists.
2009     SmallVector<SDValue, 8> Ops;
2010     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2011       Ops.push_back(N->getOperand(i));
2012     DAG.ReplaceAllUsesWith(N, Ops.data());
2013   } while (!N->use_empty());
2014   deleteAndRecombine(N);
2015   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2016 }
2017 
2018 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2019 /// ConstantSDNode pointer else nullptr.
2020 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2021   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2022   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2023 }
2024 
2025 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2026   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2027          "Unexpected binary operator");
2028 
2029   // Don't do this unless the old select is going away. We want to eliminate the
2030   // binary operator, not replace a binop with a select.
2031   // TODO: Handle ISD::SELECT_CC.
2032   unsigned SelOpNo = 0;
2033   SDValue Sel = BO->getOperand(0);
2034   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2035     SelOpNo = 1;
2036     Sel = BO->getOperand(1);
2037   }
2038 
2039   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2040     return SDValue();
2041 
2042   SDValue CT = Sel.getOperand(1);
2043   if (!isConstantOrConstantVector(CT, true) &&
2044       !isConstantFPBuildVectorOrConstantFP(CT))
2045     return SDValue();
2046 
2047   SDValue CF = Sel.getOperand(2);
2048   if (!isConstantOrConstantVector(CF, true) &&
2049       !isConstantFPBuildVectorOrConstantFP(CF))
2050     return SDValue();
2051 
2052   // Bail out if any constants are opaque because we can't constant fold those.
2053   // The exception is "and" and "or" with either 0 or -1 in which case we can
2054   // propagate non constant operands into select. I.e.:
2055   // and (select Cond, 0, -1), X --> select Cond, 0, X
2056   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2057   auto BinOpcode = BO->getOpcode();
2058   bool CanFoldNonConst =
2059       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2060       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2061       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2062 
2063   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2064   if (!CanFoldNonConst &&
2065       !isConstantOrConstantVector(CBO, true) &&
2066       !isConstantFPBuildVectorOrConstantFP(CBO))
2067     return SDValue();
2068 
2069   EVT VT = Sel.getValueType();
2070 
2071   // In case of shift value and shift amount may have different VT. For instance
2072   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2073   // swapped operands and value types do not match. NB: x86 is fine if operands
2074   // are not swapped with shift amount VT being not bigger than shifted value.
2075   // TODO: that is possible to check for a shift operation, correct VTs and
2076   // still perform optimization on x86 if needed.
2077   if (SelOpNo && VT != CBO.getValueType())
2078     return SDValue();
2079 
2080   // We have a select-of-constants followed by a binary operator with a
2081   // constant. Eliminate the binop by pulling the constant math into the select.
2082   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2083   SDLoc DL(Sel);
2084   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2085                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2086   if (!CanFoldNonConst && !NewCT.isUndef() &&
2087       !isConstantOrConstantVector(NewCT, true) &&
2088       !isConstantFPBuildVectorOrConstantFP(NewCT))
2089     return SDValue();
2090 
2091   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2092                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2093   if (!CanFoldNonConst && !NewCF.isUndef() &&
2094       !isConstantOrConstantVector(NewCF, true) &&
2095       !isConstantFPBuildVectorOrConstantFP(NewCF))
2096     return SDValue();
2097 
2098   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2099   SelectOp->setFlags(BO->getFlags());
2100   return SelectOp;
2101 }
2102 
2103 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2104   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2105          "Expecting add or sub");
2106 
2107   // Match a constant operand and a zext operand for the math instruction:
2108   // add Z, C
2109   // sub C, Z
2110   bool IsAdd = N->getOpcode() == ISD::ADD;
2111   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2112   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2113   auto *CN = dyn_cast<ConstantSDNode>(C);
2114   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2115     return SDValue();
2116 
2117   // Match the zext operand as a setcc of a boolean.
2118   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2119       Z.getOperand(0).getValueType() != MVT::i1)
2120     return SDValue();
2121 
2122   // Match the compare as: setcc (X & 1), 0, eq.
2123   SDValue SetCC = Z.getOperand(0);
2124   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2125   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2126       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2127       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2128     return SDValue();
2129 
2130   // We are adding/subtracting a constant and an inverted low bit. Turn that
2131   // into a subtract/add of the low bit with incremented/decremented constant:
2132   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2133   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2134   EVT VT = C.getValueType();
2135   SDLoc DL(N);
2136   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2137   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2138                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2139   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2140 }
2141 
2142 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2143 /// a shift and add with a different constant.
2144 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2145   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2146          "Expecting add or sub");
2147 
2148   // We need a constant operand for the add/sub, and the other operand is a
2149   // logical shift right: add (srl), C or sub C, (srl).
2150   bool IsAdd = N->getOpcode() == ISD::ADD;
2151   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2152   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2153   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2154       ShiftOp.getOpcode() != ISD::SRL)
2155     return SDValue();
2156 
2157   // The shift must be of a 'not' value.
2158   SDValue Not = ShiftOp.getOperand(0);
2159   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2160     return SDValue();
2161 
2162   // The shift must be moving the sign bit to the least-significant-bit.
2163   EVT VT = ShiftOp.getValueType();
2164   SDValue ShAmt = ShiftOp.getOperand(1);
2165   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2166   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2167     return SDValue();
2168 
2169   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2170   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2171   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2172   SDLoc DL(N);
2173   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2174   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2175   if (SDValue NewC =
2176           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2177                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2178     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2179   return SDValue();
2180 }
2181 
2182 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2183 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2184 /// are no common bits set in the operands).
2185 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2186   SDValue N0 = N->getOperand(0);
2187   SDValue N1 = N->getOperand(1);
2188   EVT VT = N0.getValueType();
2189   SDLoc DL(N);
2190 
2191   // fold vector ops
2192   if (VT.isVector()) {
2193     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2194       return FoldedVOp;
2195 
2196     // fold (add x, 0) -> x, vector edition
2197     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2198       return N0;
2199     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2200       return N1;
2201   }
2202 
2203   // fold (add x, undef) -> undef
2204   if (N0.isUndef())
2205     return N0;
2206 
2207   if (N1.isUndef())
2208     return N1;
2209 
2210   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2211     // canonicalize constant to RHS
2212     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2213       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2214     // fold (add c1, c2) -> c1+c2
2215     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2216   }
2217 
2218   // fold (add x, 0) -> x
2219   if (isNullConstant(N1))
2220     return N0;
2221 
2222   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2223     // fold ((A-c1)+c2) -> (A+(c2-c1))
2224     if (N0.getOpcode() == ISD::SUB &&
2225         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2226       SDValue Sub =
2227           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2228       assert(Sub && "Constant folding failed");
2229       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2230     }
2231 
2232     // fold ((c1-A)+c2) -> (c1+c2)-A
2233     if (N0.getOpcode() == ISD::SUB &&
2234         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2235       SDValue Add =
2236           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2237       assert(Add && "Constant folding failed");
2238       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2239     }
2240 
2241     // add (sext i1 X), 1 -> zext (not i1 X)
2242     // We don't transform this pattern:
2243     //   add (zext i1 X), -1 -> sext (not i1 X)
2244     // because most (?) targets generate better code for the zext form.
2245     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2246         isOneOrOneSplat(N1)) {
2247       SDValue X = N0.getOperand(0);
2248       if ((!LegalOperations ||
2249            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2250             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2251           X.getScalarValueSizeInBits() == 1) {
2252         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2253         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2254       }
2255     }
2256 
2257     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2258     // equivalent to (add x, c0).
2259     if (N0.getOpcode() == ISD::OR &&
2260         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2261         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2262       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2263                                                     {N1, N0.getOperand(1)}))
2264         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2265     }
2266   }
2267 
2268   if (SDValue NewSel = foldBinOpIntoSelect(N))
2269     return NewSel;
2270 
2271   // reassociate add
2272   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2273     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2274       return RADD;
2275   }
2276   // fold ((0-A) + B) -> B-A
2277   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2278     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2279 
2280   // fold (A + (0-B)) -> A-B
2281   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2282     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2283 
2284   // fold (A+(B-A)) -> B
2285   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2286     return N1.getOperand(0);
2287 
2288   // fold ((B-A)+A) -> B
2289   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2290     return N0.getOperand(0);
2291 
2292   // fold ((A-B)+(C-A)) -> (C-B)
2293   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2294       N0.getOperand(0) == N1.getOperand(1))
2295     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2296                        N0.getOperand(1));
2297 
2298   // fold ((A-B)+(B-C)) -> (A-C)
2299   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2300       N0.getOperand(1) == N1.getOperand(0))
2301     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2302                        N1.getOperand(1));
2303 
2304   // fold (A+(B-(A+C))) to (B-C)
2305   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2306       N0 == N1.getOperand(1).getOperand(0))
2307     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2308                        N1.getOperand(1).getOperand(1));
2309 
2310   // fold (A+(B-(C+A))) to (B-C)
2311   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2312       N0 == N1.getOperand(1).getOperand(1))
2313     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2314                        N1.getOperand(1).getOperand(0));
2315 
2316   // fold (A+((B-A)+or-C)) to (B+or-C)
2317   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2318       N1.getOperand(0).getOpcode() == ISD::SUB &&
2319       N0 == N1.getOperand(0).getOperand(1))
2320     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2321                        N1.getOperand(1));
2322 
2323   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2324   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2325     SDValue N00 = N0.getOperand(0);
2326     SDValue N01 = N0.getOperand(1);
2327     SDValue N10 = N1.getOperand(0);
2328     SDValue N11 = N1.getOperand(1);
2329 
2330     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2331       return DAG.getNode(ISD::SUB, DL, VT,
2332                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2333                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2334   }
2335 
2336   // fold (add (umax X, C), -C) --> (usubsat X, C)
2337   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2338     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2339       return (!Max && !Op) ||
2340              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2341     };
2342     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2343                                   /*AllowUndefs*/ true))
2344       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2345                          N0.getOperand(1));
2346   }
2347 
2348   if (SimplifyDemandedBits(SDValue(N, 0)))
2349     return SDValue(N, 0);
2350 
2351   if (isOneOrOneSplat(N1)) {
2352     // fold (add (xor a, -1), 1) -> (sub 0, a)
2353     if (isBitwiseNot(N0))
2354       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2355                          N0.getOperand(0));
2356 
2357     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2358     if (N0.getOpcode() == ISD::ADD ||
2359         N0.getOpcode() == ISD::UADDO ||
2360         N0.getOpcode() == ISD::SADDO) {
2361       SDValue A, Xor;
2362 
2363       if (isBitwiseNot(N0.getOperand(0))) {
2364         A = N0.getOperand(1);
2365         Xor = N0.getOperand(0);
2366       } else if (isBitwiseNot(N0.getOperand(1))) {
2367         A = N0.getOperand(0);
2368         Xor = N0.getOperand(1);
2369       }
2370 
2371       if (Xor)
2372         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2373     }
2374 
2375     // Look for:
2376     //   add (add x, y), 1
2377     // And if the target does not like this form then turn into:
2378     //   sub y, (xor x, -1)
2379     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2380         N0.getOpcode() == ISD::ADD) {
2381       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2382                                 DAG.getAllOnesConstant(DL, VT));
2383       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2384     }
2385   }
2386 
2387   // (x - y) + -1  ->  add (xor y, -1), x
2388   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2389       isAllOnesOrAllOnesSplat(N1)) {
2390     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2391     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2392   }
2393 
2394   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2395     return Combined;
2396 
2397   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2398     return Combined;
2399 
2400   return SDValue();
2401 }
2402 
2403 SDValue DAGCombiner::visitADD(SDNode *N) {
2404   SDValue N0 = N->getOperand(0);
2405   SDValue N1 = N->getOperand(1);
2406   EVT VT = N0.getValueType();
2407   SDLoc DL(N);
2408 
2409   if (SDValue Combined = visitADDLike(N))
2410     return Combined;
2411 
2412   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2413     return V;
2414 
2415   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2416     return V;
2417 
2418   // fold (a+b) -> (a|b) iff a and b share no bits.
2419   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2420       DAG.haveNoCommonBitsSet(N0, N1))
2421     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2422 
2423   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2424   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2425     APInt C0 = N0->getConstantOperandAPInt(0);
2426     APInt C1 = N1->getConstantOperandAPInt(0);
2427     return DAG.getVScale(DL, VT, C0 + C1);
2428   }
2429 
2430   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2431   if ((N0.getOpcode() == ISD::ADD) &&
2432       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2433       (N1.getOpcode() == ISD::VSCALE)) {
2434     auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2435     auto VS1 = N1->getConstantOperandAPInt(0);
2436     auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
2437     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2438   }
2439 
2440   return SDValue();
2441 }
2442 
2443 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2444   unsigned Opcode = N->getOpcode();
2445   SDValue N0 = N->getOperand(0);
2446   SDValue N1 = N->getOperand(1);
2447   EVT VT = N0.getValueType();
2448   SDLoc DL(N);
2449 
2450   // fold vector ops
2451   if (VT.isVector()) {
2452     // TODO SimplifyVBinOp
2453 
2454     // fold (add_sat x, 0) -> x, vector edition
2455     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2456       return N0;
2457     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2458       return N1;
2459   }
2460 
2461   // fold (add_sat x, undef) -> -1
2462   if (N0.isUndef() || N1.isUndef())
2463     return DAG.getAllOnesConstant(DL, VT);
2464 
2465   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2466     // canonicalize constant to RHS
2467     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2468       return DAG.getNode(Opcode, DL, VT, N1, N0);
2469     // fold (add_sat c1, c2) -> c3
2470     return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2471   }
2472 
2473   // fold (add_sat x, 0) -> x
2474   if (isNullConstant(N1))
2475     return N0;
2476 
2477   // If it cannot overflow, transform into an add.
2478   if (Opcode == ISD::UADDSAT)
2479     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2480       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2481 
2482   return SDValue();
2483 }
2484 
2485 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2486   bool Masked = false;
2487 
2488   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2489   while (true) {
2490     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2491       V = V.getOperand(0);
2492       continue;
2493     }
2494 
2495     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2496       Masked = true;
2497       V = V.getOperand(0);
2498       continue;
2499     }
2500 
2501     break;
2502   }
2503 
2504   // If this is not a carry, return.
2505   if (V.getResNo() != 1)
2506     return SDValue();
2507 
2508   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2509       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2510     return SDValue();
2511 
2512   EVT VT = V.getNode()->getValueType(0);
2513   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2514     return SDValue();
2515 
2516   // If the result is masked, then no matter what kind of bool it is we can
2517   // return. If it isn't, then we need to make sure the bool type is either 0 or
2518   // 1 and not other values.
2519   if (Masked ||
2520       TLI.getBooleanContents(V.getValueType()) ==
2521           TargetLoweringBase::ZeroOrOneBooleanContent)
2522     return V;
2523 
2524   return SDValue();
2525 }
2526 
2527 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2528 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2529 /// the opcode and bypass the mask operation.
2530 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2531                                  SelectionDAG &DAG, const SDLoc &DL) {
2532   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2533     return SDValue();
2534 
2535   EVT VT = N0.getValueType();
2536   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2537     return SDValue();
2538 
2539   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2540   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2541   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2542 }
2543 
2544 /// Helper for doing combines based on N0 and N1 being added to each other.
2545 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2546                                           SDNode *LocReference) {
2547   EVT VT = N0.getValueType();
2548   SDLoc DL(LocReference);
2549 
2550   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2551   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2552       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2553     return DAG.getNode(ISD::SUB, DL, VT, N0,
2554                        DAG.getNode(ISD::SHL, DL, VT,
2555                                    N1.getOperand(0).getOperand(1),
2556                                    N1.getOperand(1)));
2557 
2558   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2559     return V;
2560 
2561   // Look for:
2562   //   add (add x, 1), y
2563   // And if the target does not like this form then turn into:
2564   //   sub y, (xor x, -1)
2565   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2566       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2567     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2568                               DAG.getAllOnesConstant(DL, VT));
2569     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2570   }
2571 
2572   // Hoist one-use subtraction by non-opaque constant:
2573   //   (x - C) + y  ->  (x + y) - C
2574   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2575   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2576       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2577     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2578     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2579   }
2580   // Hoist one-use subtraction from non-opaque constant:
2581   //   (C - x) + y  ->  (y - x) + C
2582   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2583       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2584     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2585     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2586   }
2587 
2588   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2589   // rather than 'add 0/-1' (the zext should get folded).
2590   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2591   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2592       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2593       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2594     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2595     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2596   }
2597 
2598   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2599   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2600     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2601     if (TN->getVT() == MVT::i1) {
2602       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2603                                  DAG.getConstant(1, DL, VT));
2604       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2605     }
2606   }
2607 
2608   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2609   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2610       N1.getResNo() == 0)
2611     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2612                        N0, N1.getOperand(0), N1.getOperand(2));
2613 
2614   // (add X, Carry) -> (addcarry X, 0, Carry)
2615   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2616     if (SDValue Carry = getAsCarry(TLI, N1))
2617       return DAG.getNode(ISD::ADDCARRY, DL,
2618                          DAG.getVTList(VT, Carry.getValueType()), N0,
2619                          DAG.getConstant(0, DL, VT), Carry);
2620 
2621   return SDValue();
2622 }
2623 
2624 SDValue DAGCombiner::visitADDC(SDNode *N) {
2625   SDValue N0 = N->getOperand(0);
2626   SDValue N1 = N->getOperand(1);
2627   EVT VT = N0.getValueType();
2628   SDLoc DL(N);
2629 
2630   // If the flag result is dead, turn this into an ADD.
2631   if (!N->hasAnyUseOfValue(1))
2632     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2633                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2634 
2635   // canonicalize constant to RHS.
2636   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2637   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2638   if (N0C && !N1C)
2639     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2640 
2641   // fold (addc x, 0) -> x + no carry out
2642   if (isNullConstant(N1))
2643     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2644                                         DL, MVT::Glue));
2645 
2646   // If it cannot overflow, transform into an add.
2647   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2648     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2649                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2650 
2651   return SDValue();
2652 }
2653 
2654 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2655                            SelectionDAG &DAG, const TargetLowering &TLI) {
2656   EVT VT = V.getValueType();
2657 
2658   SDValue Cst;
2659   switch (TLI.getBooleanContents(VT)) {
2660   case TargetLowering::ZeroOrOneBooleanContent:
2661   case TargetLowering::UndefinedBooleanContent:
2662     Cst = DAG.getConstant(1, DL, VT);
2663     break;
2664   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2665     Cst = DAG.getAllOnesConstant(DL, VT);
2666     break;
2667   }
2668 
2669   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2670 }
2671 
2672 /**
2673  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2674  * then the flip also occurs if computing the inverse is the same cost.
2675  * This function returns an empty SDValue in case it cannot flip the boolean
2676  * without increasing the cost of the computation. If you want to flip a boolean
2677  * no matter what, use flipBoolean.
2678  */
2679 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2680                                   const TargetLowering &TLI,
2681                                   bool Force) {
2682   if (Force && isa<ConstantSDNode>(V))
2683     return flipBoolean(V, SDLoc(V), DAG, TLI);
2684 
2685   if (V.getOpcode() != ISD::XOR)
2686     return SDValue();
2687 
2688   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2689   if (!Const)
2690     return SDValue();
2691 
2692   EVT VT = V.getValueType();
2693 
2694   bool IsFlip = false;
2695   switch(TLI.getBooleanContents(VT)) {
2696     case TargetLowering::ZeroOrOneBooleanContent:
2697       IsFlip = Const->isOne();
2698       break;
2699     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2700       IsFlip = Const->isAllOnesValue();
2701       break;
2702     case TargetLowering::UndefinedBooleanContent:
2703       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2704       break;
2705   }
2706 
2707   if (IsFlip)
2708     return V.getOperand(0);
2709   if (Force)
2710     return flipBoolean(V, SDLoc(V), DAG, TLI);
2711   return SDValue();
2712 }
2713 
2714 SDValue DAGCombiner::visitADDO(SDNode *N) {
2715   SDValue N0 = N->getOperand(0);
2716   SDValue N1 = N->getOperand(1);
2717   EVT VT = N0.getValueType();
2718   bool IsSigned = (ISD::SADDO == N->getOpcode());
2719 
2720   EVT CarryVT = N->getValueType(1);
2721   SDLoc DL(N);
2722 
2723   // If the flag result is dead, turn this into an ADD.
2724   if (!N->hasAnyUseOfValue(1))
2725     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2726                      DAG.getUNDEF(CarryVT));
2727 
2728   // canonicalize constant to RHS.
2729   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2730       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2731     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2732 
2733   // fold (addo x, 0) -> x + no carry out
2734   if (isNullOrNullSplat(N1))
2735     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2736 
2737   if (!IsSigned) {
2738     // If it cannot overflow, transform into an add.
2739     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2740       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2741                        DAG.getConstant(0, DL, CarryVT));
2742 
2743     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2744     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2745       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2746                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2747       return CombineTo(N, Sub,
2748                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2749     }
2750 
2751     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2752       return Combined;
2753 
2754     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2755       return Combined;
2756   }
2757 
2758   return SDValue();
2759 }
2760 
2761 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2762   EVT VT = N0.getValueType();
2763   if (VT.isVector())
2764     return SDValue();
2765 
2766   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2767   // If Y + 1 cannot overflow.
2768   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2769     SDValue Y = N1.getOperand(0);
2770     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2771     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2772       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2773                          N1.getOperand(2));
2774   }
2775 
2776   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2777   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2778     if (SDValue Carry = getAsCarry(TLI, N1))
2779       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2780                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2781 
2782   return SDValue();
2783 }
2784 
2785 SDValue DAGCombiner::visitADDE(SDNode *N) {
2786   SDValue N0 = N->getOperand(0);
2787   SDValue N1 = N->getOperand(1);
2788   SDValue CarryIn = N->getOperand(2);
2789 
2790   // canonicalize constant to RHS
2791   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2792   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2793   if (N0C && !N1C)
2794     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2795                        N1, N0, CarryIn);
2796 
2797   // fold (adde x, y, false) -> (addc x, y)
2798   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2799     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2800 
2801   return SDValue();
2802 }
2803 
2804 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2805   SDValue N0 = N->getOperand(0);
2806   SDValue N1 = N->getOperand(1);
2807   SDValue CarryIn = N->getOperand(2);
2808   SDLoc DL(N);
2809 
2810   // canonicalize constant to RHS
2811   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2812   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2813   if (N0C && !N1C)
2814     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2815 
2816   // fold (addcarry x, y, false) -> (uaddo x, y)
2817   if (isNullConstant(CarryIn)) {
2818     if (!LegalOperations ||
2819         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2820       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2821   }
2822 
2823   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2824   if (isNullConstant(N0) && isNullConstant(N1)) {
2825     EVT VT = N0.getValueType();
2826     EVT CarryVT = CarryIn.getValueType();
2827     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2828     AddToWorklist(CarryExt.getNode());
2829     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2830                                     DAG.getConstant(1, DL, VT)),
2831                      DAG.getConstant(0, DL, CarryVT));
2832   }
2833 
2834   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2835     return Combined;
2836 
2837   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2838     return Combined;
2839 
2840   return SDValue();
2841 }
2842 
2843 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2844   SDValue N0 = N->getOperand(0);
2845   SDValue N1 = N->getOperand(1);
2846   SDValue CarryIn = N->getOperand(2);
2847   SDLoc DL(N);
2848 
2849   // canonicalize constant to RHS
2850   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2851   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2852   if (N0C && !N1C)
2853     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2854 
2855   // fold (saddo_carry x, y, false) -> (saddo x, y)
2856   if (isNullConstant(CarryIn)) {
2857     if (!LegalOperations ||
2858         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2859       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2860   }
2861 
2862   return SDValue();
2863 }
2864 
2865 /**
2866  * If we are facing some sort of diamond carry propapagtion pattern try to
2867  * break it up to generate something like:
2868  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2869  *
2870  * The end result is usually an increase in operation required, but because the
2871  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2872  *
2873  * Patterns typically look something like
2874  *            (uaddo A, B)
2875  *             /       \
2876  *          Carry      Sum
2877  *            |          \
2878  *            | (addcarry *, 0, Z)
2879  *            |       /
2880  *             \   Carry
2881  *              |   /
2882  * (addcarry X, *, *)
2883  *
2884  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2885  * produce a combine with a single path for carry propagation.
2886  */
2887 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2888                                       SDValue X, SDValue Carry0, SDValue Carry1,
2889                                       SDNode *N) {
2890   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2891     return SDValue();
2892   if (Carry1.getOpcode() != ISD::UADDO)
2893     return SDValue();
2894 
2895   SDValue Z;
2896 
2897   /**
2898    * First look for a suitable Z. It will present itself in the form of
2899    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2900    */
2901   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2902       isNullConstant(Carry0.getOperand(1))) {
2903     Z = Carry0.getOperand(2);
2904   } else if (Carry0.getOpcode() == ISD::UADDO &&
2905              isOneConstant(Carry0.getOperand(1))) {
2906     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2907     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2908   } else {
2909     // We couldn't find a suitable Z.
2910     return SDValue();
2911   }
2912 
2913 
2914   auto cancelDiamond = [&](SDValue A,SDValue B) {
2915     SDLoc DL(N);
2916     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2917     Combiner.AddToWorklist(NewY.getNode());
2918     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2919                        DAG.getConstant(0, DL, X.getValueType()),
2920                        NewY.getValue(1));
2921   };
2922 
2923   /**
2924    *      (uaddo A, B)
2925    *           |
2926    *          Sum
2927    *           |
2928    * (addcarry *, 0, Z)
2929    */
2930   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2931     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2932   }
2933 
2934   /**
2935    * (addcarry A, 0, Z)
2936    *         |
2937    *        Sum
2938    *         |
2939    *  (uaddo *, B)
2940    */
2941   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2942     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2943   }
2944 
2945   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2946     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2947   }
2948 
2949   return SDValue();
2950 }
2951 
2952 // If we are facing some sort of diamond carry/borrow in/out pattern try to
2953 // match patterns like:
2954 //
2955 //          (uaddo A, B)            CarryIn
2956 //            |  \                     |
2957 //            |   \                    |
2958 //    PartialSum   PartialCarryOutX   /
2959 //            |        |             /
2960 //            |    ____|____________/
2961 //            |   /    |
2962 //     (uaddo *, *)    \________
2963 //       |  \                   \
2964 //       |   \                   |
2965 //       |    PartialCarryOutY   |
2966 //       |        \              |
2967 //       |         \            /
2968 //   AddCarrySum    |    ______/
2969 //                  |   /
2970 //   CarryOut = (or *, *)
2971 //
2972 // And generate ADDCARRY (or SUBCARRY) with two result values:
2973 //
2974 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
2975 //
2976 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
2977 // a single path for carry/borrow out propagation:
2978 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2979                                    const TargetLowering &TLI, SDValue Carry0,
2980                                    SDValue Carry1, SDNode *N) {
2981   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
2982     return SDValue();
2983   unsigned Opcode = Carry0.getOpcode();
2984   if (Opcode != Carry1.getOpcode())
2985     return SDValue();
2986   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
2987     return SDValue();
2988 
2989   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
2990   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
2991   // the above ASCII art.)
2992   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2993       Carry1.getOperand(1) != Carry0.getValue(0))
2994     std::swap(Carry0, Carry1);
2995   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2996       Carry1.getOperand(1) != Carry0.getValue(0))
2997     return SDValue();
2998 
2999   // The carry in value must be on the righthand side for subtraction.
3000   unsigned CarryInOperandNum =
3001       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3002   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3003     return SDValue();
3004   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3005 
3006   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3007   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3008     return SDValue();
3009 
3010   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3011   // TODO: make getAsCarry() aware of how partial carries are merged.
3012   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3013     return SDValue();
3014   CarryIn = CarryIn.getOperand(0);
3015   if (CarryIn.getValueType() != MVT::i1)
3016     return SDValue();
3017 
3018   SDLoc DL(N);
3019   SDValue Merged =
3020       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3021                   Carry0.getOperand(1), CarryIn);
3022 
3023   // Please note that because we have proven that the result of the UADDO/USUBO
3024   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3025   // therefore prove that if the first UADDO/USUBO overflows, the second
3026   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3027   // maximum value.
3028   //
3029   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3030   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3031   //
3032   // This is important because it means that OR and XOR can be used to merge
3033   // carry flags; and that AND can return a constant zero.
3034   //
3035   // TODO: match other operations that can merge flags (ADD, etc)
3036   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3037   if (N->getOpcode() == ISD::AND)
3038     return DAG.getConstant(0, DL, MVT::i1);
3039   return Merged.getValue(1);
3040 }
3041 
3042 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3043                                        SDNode *N) {
3044   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3045   if (isBitwiseNot(N0))
3046     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3047       SDLoc DL(N);
3048       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3049                                 N0.getOperand(0), NotC);
3050       return CombineTo(N, Sub,
3051                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
3052     }
3053 
3054   // Iff the flag result is dead:
3055   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3056   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3057   // or the dependency between the instructions.
3058   if ((N0.getOpcode() == ISD::ADD ||
3059        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3060         N0.getValue(1) != CarryIn)) &&
3061       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3062     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3063                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3064 
3065   /**
3066    * When one of the addcarry argument is itself a carry, we may be facing
3067    * a diamond carry propagation. In which case we try to transform the DAG
3068    * to ensure linear carry propagation if that is possible.
3069    */
3070   if (auto Y = getAsCarry(TLI, N1)) {
3071     // Because both are carries, Y and Z can be swapped.
3072     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3073       return R;
3074     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3075       return R;
3076   }
3077 
3078   return SDValue();
3079 }
3080 
3081 // Since it may not be valid to emit a fold to zero for vector initializers
3082 // check if we can before folding.
3083 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3084                              SelectionDAG &DAG, bool LegalOperations) {
3085   if (!VT.isVector())
3086     return DAG.getConstant(0, DL, VT);
3087   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3088     return DAG.getConstant(0, DL, VT);
3089   return SDValue();
3090 }
3091 
3092 SDValue DAGCombiner::visitSUB(SDNode *N) {
3093   SDValue N0 = N->getOperand(0);
3094   SDValue N1 = N->getOperand(1);
3095   EVT VT = N0.getValueType();
3096   SDLoc DL(N);
3097 
3098   // fold vector ops
3099   if (VT.isVector()) {
3100     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3101       return FoldedVOp;
3102 
3103     // fold (sub x, 0) -> x, vector edition
3104     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3105       return N0;
3106   }
3107 
3108   // fold (sub x, x) -> 0
3109   // FIXME: Refactor this and xor and other similar operations together.
3110   if (N0 == N1)
3111     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3112 
3113   // fold (sub c1, c2) -> c3
3114   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3115     return C;
3116 
3117   if (SDValue NewSel = foldBinOpIntoSelect(N))
3118     return NewSel;
3119 
3120   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3121 
3122   // fold (sub x, c) -> (add x, -c)
3123   if (N1C) {
3124     return DAG.getNode(ISD::ADD, DL, VT, N0,
3125                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3126   }
3127 
3128   if (isNullOrNullSplat(N0)) {
3129     unsigned BitWidth = VT.getScalarSizeInBits();
3130     // Right-shifting everything out but the sign bit followed by negation is
3131     // the same as flipping arithmetic/logical shift type without the negation:
3132     // -(X >>u 31) -> (X >>s 31)
3133     // -(X >>s 31) -> (X >>u 31)
3134     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3135       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3136       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3137         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3138         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3139           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3140       }
3141     }
3142 
3143     // 0 - X --> 0 if the sub is NUW.
3144     if (N->getFlags().hasNoUnsignedWrap())
3145       return N0;
3146 
3147     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3148       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3149       // N1 must be 0 because negating the minimum signed value is undefined.
3150       if (N->getFlags().hasNoSignedWrap())
3151         return N0;
3152 
3153       // 0 - X --> X if X is 0 or the minimum signed value.
3154       return N1;
3155     }
3156   }
3157 
3158   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3159   if (isAllOnesOrAllOnesSplat(N0))
3160     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3161 
3162   // fold (A - (0-B)) -> A+B
3163   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3164     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3165 
3166   // fold A-(A-B) -> B
3167   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3168     return N1.getOperand(1);
3169 
3170   // fold (A+B)-A -> B
3171   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3172     return N0.getOperand(1);
3173 
3174   // fold (A+B)-B -> A
3175   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3176     return N0.getOperand(0);
3177 
3178   // fold (A+C1)-C2 -> A+(C1-C2)
3179   if (N0.getOpcode() == ISD::ADD &&
3180       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3181       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3182     SDValue NewC =
3183         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3184     assert(NewC && "Constant folding failed");
3185     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3186   }
3187 
3188   // fold C2-(A+C1) -> (C2-C1)-A
3189   if (N1.getOpcode() == ISD::ADD) {
3190     SDValue N11 = N1.getOperand(1);
3191     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3192         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3193       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3194       assert(NewC && "Constant folding failed");
3195       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3196     }
3197   }
3198 
3199   // fold (A-C1)-C2 -> A-(C1+C2)
3200   if (N0.getOpcode() == ISD::SUB &&
3201       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3202       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3203     SDValue NewC =
3204         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3205     assert(NewC && "Constant folding failed");
3206     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3207   }
3208 
3209   // fold (c1-A)-c2 -> (c1-c2)-A
3210   if (N0.getOpcode() == ISD::SUB &&
3211       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3212       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3213     SDValue NewC =
3214         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3215     assert(NewC && "Constant folding failed");
3216     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3217   }
3218 
3219   // fold ((A+(B+or-C))-B) -> A+or-C
3220   if (N0.getOpcode() == ISD::ADD &&
3221       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3222        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3223       N0.getOperand(1).getOperand(0) == N1)
3224     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3225                        N0.getOperand(1).getOperand(1));
3226 
3227   // fold ((A+(C+B))-B) -> A+C
3228   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3229       N0.getOperand(1).getOperand(1) == N1)
3230     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3231                        N0.getOperand(1).getOperand(0));
3232 
3233   // fold ((A-(B-C))-C) -> A-B
3234   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3235       N0.getOperand(1).getOperand(1) == N1)
3236     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3237                        N0.getOperand(1).getOperand(0));
3238 
3239   // fold (A-(B-C)) -> A+(C-B)
3240   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3241     return DAG.getNode(ISD::ADD, DL, VT, N0,
3242                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3243                                    N1.getOperand(0)));
3244 
3245   // A - (A & B)  ->  A & (~B)
3246   if (N1.getOpcode() == ISD::AND) {
3247     SDValue A = N1.getOperand(0);
3248     SDValue B = N1.getOperand(1);
3249     if (A != N0)
3250       std::swap(A, B);
3251     if (A == N0 &&
3252         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3253       SDValue InvB =
3254           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3255       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3256     }
3257   }
3258 
3259   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3260   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3261     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3262         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3263       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3264                                 N1.getOperand(0).getOperand(1),
3265                                 N1.getOperand(1));
3266       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3267     }
3268     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3269         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3270       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3271                                 N1.getOperand(0),
3272                                 N1.getOperand(1).getOperand(1));
3273       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3274     }
3275   }
3276 
3277   // If either operand of a sub is undef, the result is undef
3278   if (N0.isUndef())
3279     return N0;
3280   if (N1.isUndef())
3281     return N1;
3282 
3283   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3284     return V;
3285 
3286   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3287     return V;
3288 
3289   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3290     return V;
3291 
3292   // (x - y) - 1  ->  add (xor y, -1), x
3293   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3294     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3295                               DAG.getAllOnesConstant(DL, VT));
3296     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3297   }
3298 
3299   // Look for:
3300   //   sub y, (xor x, -1)
3301   // And if the target does not like this form then turn into:
3302   //   add (add x, y), 1
3303   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3304     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3305     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3306   }
3307 
3308   // Hoist one-use addition by non-opaque constant:
3309   //   (x + C) - y  ->  (x - y) + C
3310   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3311       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3312     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3313     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3314   }
3315   // y - (x + C)  ->  (y - x) - C
3316   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3317       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3318     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3319     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3320   }
3321   // (x - C) - y  ->  (x - y) - C
3322   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3323   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3324       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3325     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3326     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3327   }
3328   // (C - x) - y  ->  C - (x + y)
3329   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3330       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3331     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3332     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3333   }
3334 
3335   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3336   // rather than 'sub 0/1' (the sext should get folded).
3337   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3338   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3339       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3340       TLI.getBooleanContents(VT) ==
3341           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3342     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3343     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3344   }
3345 
3346   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3347   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3348     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3349       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3350       SDValue S0 = N1.getOperand(0);
3351       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3352         unsigned OpSizeInBits = VT.getScalarSizeInBits();
3353         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3354           if (C->getAPIntValue() == (OpSizeInBits - 1))
3355             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3356       }
3357     }
3358   }
3359 
3360   // If the relocation model supports it, consider symbol offsets.
3361   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3362     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3363       // fold (sub Sym, c) -> Sym-c
3364       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3365         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3366                                     GA->getOffset() -
3367                                         (uint64_t)N1C->getSExtValue());
3368       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3369       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3370         if (GA->getGlobal() == GB->getGlobal())
3371           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3372                                  DL, VT);
3373     }
3374 
3375   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3376   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3377     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3378     if (TN->getVT() == MVT::i1) {
3379       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3380                                  DAG.getConstant(1, DL, VT));
3381       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3382     }
3383   }
3384 
3385   // canonicalize (sub X, (vscale * C)) to (add X,  (vscale * -C))
3386   if (N1.getOpcode() == ISD::VSCALE) {
3387     APInt IntVal = N1.getConstantOperandAPInt(0);
3388     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3389   }
3390 
3391   // Prefer an add for more folding potential and possibly better codegen:
3392   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3393   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3394     SDValue ShAmt = N1.getOperand(1);
3395     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3396     if (ShAmtC &&
3397         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3398       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3399       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3400     }
3401   }
3402 
3403   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3404     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3405     if (SDValue Carry = getAsCarry(TLI, N0)) {
3406       SDValue X = N1;
3407       SDValue Zero = DAG.getConstant(0, DL, VT);
3408       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3409       return DAG.getNode(ISD::ADDCARRY, DL,
3410                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3411                          Carry);
3412     }
3413   }
3414 
3415   return SDValue();
3416 }
3417 
3418 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3419   SDValue N0 = N->getOperand(0);
3420   SDValue N1 = N->getOperand(1);
3421   EVT VT = N0.getValueType();
3422   SDLoc DL(N);
3423 
3424   // fold vector ops
3425   if (VT.isVector()) {
3426     // TODO SimplifyVBinOp
3427 
3428     // fold (sub_sat x, 0) -> x, vector edition
3429     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3430       return N0;
3431   }
3432 
3433   // fold (sub_sat x, undef) -> 0
3434   if (N0.isUndef() || N1.isUndef())
3435     return DAG.getConstant(0, DL, VT);
3436 
3437   // fold (sub_sat x, x) -> 0
3438   if (N0 == N1)
3439     return DAG.getConstant(0, DL, VT);
3440 
3441   // fold (sub_sat c1, c2) -> c3
3442   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3443     return C;
3444 
3445   // fold (sub_sat x, 0) -> x
3446   if (isNullConstant(N1))
3447     return N0;
3448 
3449   return SDValue();
3450 }
3451 
3452 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3453   SDValue N0 = N->getOperand(0);
3454   SDValue N1 = N->getOperand(1);
3455   EVT VT = N0.getValueType();
3456   SDLoc DL(N);
3457 
3458   // If the flag result is dead, turn this into an SUB.
3459   if (!N->hasAnyUseOfValue(1))
3460     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3461                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3462 
3463   // fold (subc x, x) -> 0 + no borrow
3464   if (N0 == N1)
3465     return CombineTo(N, DAG.getConstant(0, DL, VT),
3466                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3467 
3468   // fold (subc x, 0) -> x + no borrow
3469   if (isNullConstant(N1))
3470     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3471 
3472   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3473   if (isAllOnesConstant(N0))
3474     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3475                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3476 
3477   return SDValue();
3478 }
3479 
3480 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3481   SDValue N0 = N->getOperand(0);
3482   SDValue N1 = N->getOperand(1);
3483   EVT VT = N0.getValueType();
3484   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3485 
3486   EVT CarryVT = N->getValueType(1);
3487   SDLoc DL(N);
3488 
3489   // If the flag result is dead, turn this into an SUB.
3490   if (!N->hasAnyUseOfValue(1))
3491     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3492                      DAG.getUNDEF(CarryVT));
3493 
3494   // fold (subo x, x) -> 0 + no borrow
3495   if (N0 == N1)
3496     return CombineTo(N, DAG.getConstant(0, DL, VT),
3497                      DAG.getConstant(0, DL, CarryVT));
3498 
3499   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3500 
3501   // fold (subox, c) -> (addo x, -c)
3502   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3503     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3504                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3505   }
3506 
3507   // fold (subo x, 0) -> x + no borrow
3508   if (isNullOrNullSplat(N1))
3509     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3510 
3511   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3512   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3513     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3514                      DAG.getConstant(0, DL, CarryVT));
3515 
3516   return SDValue();
3517 }
3518 
3519 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3520   SDValue N0 = N->getOperand(0);
3521   SDValue N1 = N->getOperand(1);
3522   SDValue CarryIn = N->getOperand(2);
3523 
3524   // fold (sube x, y, false) -> (subc x, y)
3525   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3526     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3527 
3528   return SDValue();
3529 }
3530 
3531 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3532   SDValue N0 = N->getOperand(0);
3533   SDValue N1 = N->getOperand(1);
3534   SDValue CarryIn = N->getOperand(2);
3535 
3536   // fold (subcarry x, y, false) -> (usubo x, y)
3537   if (isNullConstant(CarryIn)) {
3538     if (!LegalOperations ||
3539         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3540       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3541   }
3542 
3543   return SDValue();
3544 }
3545 
3546 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3547   SDValue N0 = N->getOperand(0);
3548   SDValue N1 = N->getOperand(1);
3549   SDValue CarryIn = N->getOperand(2);
3550 
3551   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3552   if (isNullConstant(CarryIn)) {
3553     if (!LegalOperations ||
3554         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3555       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3556   }
3557 
3558   return SDValue();
3559 }
3560 
3561 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3562 // UMULFIXSAT here.
3563 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3564   SDValue N0 = N->getOperand(0);
3565   SDValue N1 = N->getOperand(1);
3566   SDValue Scale = N->getOperand(2);
3567   EVT VT = N0.getValueType();
3568 
3569   // fold (mulfix x, undef, scale) -> 0
3570   if (N0.isUndef() || N1.isUndef())
3571     return DAG.getConstant(0, SDLoc(N), VT);
3572 
3573   // Canonicalize constant to RHS (vector doesn't have to splat)
3574   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3575      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3576     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3577 
3578   // fold (mulfix x, 0, scale) -> 0
3579   if (isNullConstant(N1))
3580     return DAG.getConstant(0, SDLoc(N), VT);
3581 
3582   return SDValue();
3583 }
3584 
3585 SDValue DAGCombiner::visitMUL(SDNode *N) {
3586   SDValue N0 = N->getOperand(0);
3587   SDValue N1 = N->getOperand(1);
3588   EVT VT = N0.getValueType();
3589 
3590   // fold (mul x, undef) -> 0
3591   if (N0.isUndef() || N1.isUndef())
3592     return DAG.getConstant(0, SDLoc(N), VT);
3593 
3594   bool N1IsConst = false;
3595   bool N1IsOpaqueConst = false;
3596   APInt ConstValue1;
3597 
3598   // fold vector ops
3599   if (VT.isVector()) {
3600     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3601       return FoldedVOp;
3602 
3603     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3604     assert((!N1IsConst ||
3605             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3606            "Splat APInt should be element width");
3607   } else {
3608     N1IsConst = isa<ConstantSDNode>(N1);
3609     if (N1IsConst) {
3610       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3611       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3612     }
3613   }
3614 
3615   // fold (mul c1, c2) -> c1*c2
3616   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3617     return C;
3618 
3619   // canonicalize constant to RHS (vector doesn't have to splat)
3620   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3621      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3622     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3623 
3624   // fold (mul x, 0) -> 0
3625   if (N1IsConst && ConstValue1.isNullValue())
3626     return N1;
3627 
3628   // fold (mul x, 1) -> x
3629   if (N1IsConst && ConstValue1.isOneValue())
3630     return N0;
3631 
3632   if (SDValue NewSel = foldBinOpIntoSelect(N))
3633     return NewSel;
3634 
3635   // fold (mul x, -1) -> 0-x
3636   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3637     SDLoc DL(N);
3638     return DAG.getNode(ISD::SUB, DL, VT,
3639                        DAG.getConstant(0, DL, VT), N0);
3640   }
3641 
3642   // fold (mul x, (1 << c)) -> x << c
3643   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3644       DAG.isKnownToBeAPowerOfTwo(N1) &&
3645       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3646     SDLoc DL(N);
3647     SDValue LogBase2 = BuildLogBase2(N1, DL);
3648     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3649     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3650     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3651   }
3652 
3653   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3654   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3655     unsigned Log2Val = (-ConstValue1).logBase2();
3656     SDLoc DL(N);
3657     // FIXME: If the input is something that is easily negated (e.g. a
3658     // single-use add), we should put the negate there.
3659     return DAG.getNode(ISD::SUB, DL, VT,
3660                        DAG.getConstant(0, DL, VT),
3661                        DAG.getNode(ISD::SHL, DL, VT, N0,
3662                             DAG.getConstant(Log2Val, DL,
3663                                       getShiftAmountTy(N0.getValueType()))));
3664   }
3665 
3666   // Try to transform:
3667   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3668   // mul x, (2^N + 1) --> add (shl x, N), x
3669   // mul x, (2^N - 1) --> sub (shl x, N), x
3670   // Examples: x * 33 --> (x << 5) + x
3671   //           x * 15 --> (x << 4) - x
3672   //           x * -33 --> -((x << 5) + x)
3673   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3674   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3675   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3676   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3677   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3678   //           x * 0xf800 --> (x << 16) - (x << 11)
3679   //           x * -0x8800 --> -((x << 15) + (x << 11))
3680   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3681   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3682     // TODO: We could handle more general decomposition of any constant by
3683     //       having the target set a limit on number of ops and making a
3684     //       callback to determine that sequence (similar to sqrt expansion).
3685     unsigned MathOp = ISD::DELETED_NODE;
3686     APInt MulC = ConstValue1.abs();
3687     // The constant `2` should be treated as (2^0 + 1).
3688     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3689     MulC.lshrInPlace(TZeros);
3690     if ((MulC - 1).isPowerOf2())
3691       MathOp = ISD::ADD;
3692     else if ((MulC + 1).isPowerOf2())
3693       MathOp = ISD::SUB;
3694 
3695     if (MathOp != ISD::DELETED_NODE) {
3696       unsigned ShAmt =
3697           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3698       ShAmt += TZeros;
3699       assert(ShAmt < VT.getScalarSizeInBits() &&
3700              "multiply-by-constant generated out of bounds shift");
3701       SDLoc DL(N);
3702       SDValue Shl =
3703           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3704       SDValue R =
3705           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3706                                DAG.getNode(ISD::SHL, DL, VT, N0,
3707                                            DAG.getConstant(TZeros, DL, VT)))
3708                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3709       if (ConstValue1.isNegative())
3710         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3711       return R;
3712     }
3713   }
3714 
3715   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3716   if (N0.getOpcode() == ISD::SHL &&
3717       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3718       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3719     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3720     if (isConstantOrConstantVector(C3))
3721       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3722   }
3723 
3724   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3725   // use.
3726   {
3727     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3728 
3729     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3730     if (N0.getOpcode() == ISD::SHL &&
3731         isConstantOrConstantVector(N0.getOperand(1)) &&
3732         N0.getNode()->hasOneUse()) {
3733       Sh = N0; Y = N1;
3734     } else if (N1.getOpcode() == ISD::SHL &&
3735                isConstantOrConstantVector(N1.getOperand(1)) &&
3736                N1.getNode()->hasOneUse()) {
3737       Sh = N1; Y = N0;
3738     }
3739 
3740     if (Sh.getNode()) {
3741       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3742       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3743     }
3744   }
3745 
3746   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3747   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3748       N0.getOpcode() == ISD::ADD &&
3749       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3750       isMulAddWithConstProfitable(N, N0, N1))
3751       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3752                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3753                                      N0.getOperand(0), N1),
3754                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3755                                      N0.getOperand(1), N1));
3756 
3757   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3758   if (N0.getOpcode() == ISD::VSCALE)
3759     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3760       APInt C0 = N0.getConstantOperandAPInt(0);
3761       APInt C1 = NC1->getAPIntValue();
3762       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3763     }
3764 
3765   // Fold ((mul x, 0/undef) -> 0,
3766   //       (mul x, 1) -> x) -> x)
3767   // -> and(x, mask)
3768   // We can replace vectors with '0' and '1' factors with a clearing mask.
3769   if (VT.isFixedLengthVector()) {
3770     unsigned NumElts = VT.getVectorNumElements();
3771     SmallBitVector ClearMask;
3772     ClearMask.reserve(NumElts);
3773     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3774       if (!V || V->isNullValue()) {
3775         ClearMask.push_back(true);
3776         return true;
3777       }
3778       ClearMask.push_back(false);
3779       return V->isOne();
3780     };
3781     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3782         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3783       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3784       SDLoc DL(N);
3785       EVT LegalSVT = N1.getOperand(0).getValueType();
3786       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3787       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3788       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3789       for (unsigned I = 0; I != NumElts; ++I)
3790         if (ClearMask[I])
3791           Mask[I] = Zero;
3792       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3793     }
3794   }
3795 
3796   // reassociate mul
3797   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3798     return RMUL;
3799 
3800   return SDValue();
3801 }
3802 
3803 /// Return true if divmod libcall is available.
3804 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3805                                      const TargetLowering &TLI) {
3806   RTLIB::Libcall LC;
3807   EVT NodeType = Node->getValueType(0);
3808   if (!NodeType.isSimple())
3809     return false;
3810   switch (NodeType.getSimpleVT().SimpleTy) {
3811   default: return false; // No libcall for vector types.
3812   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3813   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3814   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3815   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3816   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3817   }
3818 
3819   return TLI.getLibcallName(LC) != nullptr;
3820 }
3821 
3822 /// Issue divrem if both quotient and remainder are needed.
3823 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3824   if (Node->use_empty())
3825     return SDValue(); // This is a dead node, leave it alone.
3826 
3827   unsigned Opcode = Node->getOpcode();
3828   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3829   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3830 
3831   // DivMod lib calls can still work on non-legal types if using lib-calls.
3832   EVT VT = Node->getValueType(0);
3833   if (VT.isVector() || !VT.isInteger())
3834     return SDValue();
3835 
3836   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3837     return SDValue();
3838 
3839   // If DIVREM is going to get expanded into a libcall,
3840   // but there is no libcall available, then don't combine.
3841   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3842       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3843     return SDValue();
3844 
3845   // If div is legal, it's better to do the normal expansion
3846   unsigned OtherOpcode = 0;
3847   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3848     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3849     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3850       return SDValue();
3851   } else {
3852     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3853     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3854       return SDValue();
3855   }
3856 
3857   SDValue Op0 = Node->getOperand(0);
3858   SDValue Op1 = Node->getOperand(1);
3859   SDValue combined;
3860   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3861          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3862     SDNode *User = *UI;
3863     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3864         User->use_empty())
3865       continue;
3866     // Convert the other matching node(s), too;
3867     // otherwise, the DIVREM may get target-legalized into something
3868     // target-specific that we won't be able to recognize.
3869     unsigned UserOpc = User->getOpcode();
3870     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3871         User->getOperand(0) == Op0 &&
3872         User->getOperand(1) == Op1) {
3873       if (!combined) {
3874         if (UserOpc == OtherOpcode) {
3875           SDVTList VTs = DAG.getVTList(VT, VT);
3876           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3877         } else if (UserOpc == DivRemOpc) {
3878           combined = SDValue(User, 0);
3879         } else {
3880           assert(UserOpc == Opcode);
3881           continue;
3882         }
3883       }
3884       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3885         CombineTo(User, combined);
3886       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3887         CombineTo(User, combined.getValue(1));
3888     }
3889   }
3890   return combined;
3891 }
3892 
3893 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3894   SDValue N0 = N->getOperand(0);
3895   SDValue N1 = N->getOperand(1);
3896   EVT VT = N->getValueType(0);
3897   SDLoc DL(N);
3898 
3899   unsigned Opc = N->getOpcode();
3900   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3901   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3902 
3903   // X / undef -> undef
3904   // X % undef -> undef
3905   // X / 0 -> undef
3906   // X % 0 -> undef
3907   // NOTE: This includes vectors where any divisor element is zero/undef.
3908   if (DAG.isUndef(Opc, {N0, N1}))
3909     return DAG.getUNDEF(VT);
3910 
3911   // undef / X -> 0
3912   // undef % X -> 0
3913   if (N0.isUndef())
3914     return DAG.getConstant(0, DL, VT);
3915 
3916   // 0 / X -> 0
3917   // 0 % X -> 0
3918   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3919   if (N0C && N0C->isNullValue())
3920     return N0;
3921 
3922   // X / X -> 1
3923   // X % X -> 0
3924   if (N0 == N1)
3925     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3926 
3927   // X / 1 -> X
3928   // X % 1 -> 0
3929   // If this is a boolean op (single-bit element type), we can't have
3930   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3931   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3932   // it's a 1.
3933   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3934     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3935 
3936   return SDValue();
3937 }
3938 
3939 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3940   SDValue N0 = N->getOperand(0);
3941   SDValue N1 = N->getOperand(1);
3942   EVT VT = N->getValueType(0);
3943   EVT CCVT = getSetCCResultType(VT);
3944 
3945   // fold vector ops
3946   if (VT.isVector())
3947     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3948       return FoldedVOp;
3949 
3950   SDLoc DL(N);
3951 
3952   // fold (sdiv c1, c2) -> c1/c2
3953   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3954   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
3955     return C;
3956 
3957   // fold (sdiv X, -1) -> 0-X
3958   if (N1C && N1C->isAllOnesValue())
3959     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3960 
3961   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3962   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3963     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3964                          DAG.getConstant(1, DL, VT),
3965                          DAG.getConstant(0, DL, VT));
3966 
3967   if (SDValue V = simplifyDivRem(N, DAG))
3968     return V;
3969 
3970   if (SDValue NewSel = foldBinOpIntoSelect(N))
3971     return NewSel;
3972 
3973   // If we know the sign bits of both operands are zero, strength reduce to a
3974   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3975   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3976     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3977 
3978   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3979     // If the corresponding remainder node exists, update its users with
3980     // (Dividend - (Quotient * Divisor).
3981     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3982                                               { N0, N1 })) {
3983       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3984       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3985       AddToWorklist(Mul.getNode());
3986       AddToWorklist(Sub.getNode());
3987       CombineTo(RemNode, Sub);
3988     }
3989     return V;
3990   }
3991 
3992   // sdiv, srem -> sdivrem
3993   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3994   // true.  Otherwise, we break the simplification logic in visitREM().
3995   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3996   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3997     if (SDValue DivRem = useDivRem(N))
3998         return DivRem;
3999 
4000   return SDValue();
4001 }
4002 
4003 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4004   SDLoc DL(N);
4005   EVT VT = N->getValueType(0);
4006   EVT CCVT = getSetCCResultType(VT);
4007   unsigned BitWidth = VT.getScalarSizeInBits();
4008 
4009   // Helper for determining whether a value is a power-2 constant scalar or a
4010   // vector of such elements.
4011   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4012     if (C->isNullValue() || C->isOpaque())
4013       return false;
4014     if (C->getAPIntValue().isPowerOf2())
4015       return true;
4016     if ((-C->getAPIntValue()).isPowerOf2())
4017       return true;
4018     return false;
4019   };
4020 
4021   // fold (sdiv X, pow2) -> simple ops after legalize
4022   // FIXME: We check for the exact bit here because the generic lowering gives
4023   // better results in that case. The target-specific lowering should learn how
4024   // to handle exact sdivs efficiently.
4025   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4026     // Target-specific implementation of sdiv x, pow2.
4027     if (SDValue Res = BuildSDIVPow2(N))
4028       return Res;
4029 
4030     // Create constants that are functions of the shift amount value.
4031     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4032     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4033     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4034     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4035     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4036     if (!isConstantOrConstantVector(Inexact))
4037       return SDValue();
4038 
4039     // Splat the sign bit into the register
4040     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4041                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4042     AddToWorklist(Sign.getNode());
4043 
4044     // Add (N0 < 0) ? abs2 - 1 : 0;
4045     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4046     AddToWorklist(Srl.getNode());
4047     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4048     AddToWorklist(Add.getNode());
4049     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4050     AddToWorklist(Sra.getNode());
4051 
4052     // Special case: (sdiv X, 1) -> X
4053     // Special Case: (sdiv X, -1) -> 0-X
4054     SDValue One = DAG.getConstant(1, DL, VT);
4055     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4056     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4057     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4058     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4059     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4060 
4061     // If dividing by a positive value, we're done. Otherwise, the result must
4062     // be negated.
4063     SDValue Zero = DAG.getConstant(0, DL, VT);
4064     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4065 
4066     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4067     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4068     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4069     return Res;
4070   }
4071 
4072   // If integer divide is expensive and we satisfy the requirements, emit an
4073   // alternate sequence.  Targets may check function attributes for size/speed
4074   // trade-offs.
4075   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4076   if (isConstantOrConstantVector(N1) &&
4077       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4078     if (SDValue Op = BuildSDIV(N))
4079       return Op;
4080 
4081   return SDValue();
4082 }
4083 
4084 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4085   SDValue N0 = N->getOperand(0);
4086   SDValue N1 = N->getOperand(1);
4087   EVT VT = N->getValueType(0);
4088   EVT CCVT = getSetCCResultType(VT);
4089 
4090   // fold vector ops
4091   if (VT.isVector())
4092     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4093       return FoldedVOp;
4094 
4095   SDLoc DL(N);
4096 
4097   // fold (udiv c1, c2) -> c1/c2
4098   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4099   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4100     return C;
4101 
4102   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4103   if (N1C && N1C->getAPIntValue().isAllOnesValue())
4104     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4105                          DAG.getConstant(1, DL, VT),
4106                          DAG.getConstant(0, DL, VT));
4107 
4108   if (SDValue V = simplifyDivRem(N, DAG))
4109     return V;
4110 
4111   if (SDValue NewSel = foldBinOpIntoSelect(N))
4112     return NewSel;
4113 
4114   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4115     // If the corresponding remainder node exists, update its users with
4116     // (Dividend - (Quotient * Divisor).
4117     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4118                                               { N0, N1 })) {
4119       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4120       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4121       AddToWorklist(Mul.getNode());
4122       AddToWorklist(Sub.getNode());
4123       CombineTo(RemNode, Sub);
4124     }
4125     return V;
4126   }
4127 
4128   // sdiv, srem -> sdivrem
4129   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4130   // true.  Otherwise, we break the simplification logic in visitREM().
4131   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4132   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4133     if (SDValue DivRem = useDivRem(N))
4134         return DivRem;
4135 
4136   return SDValue();
4137 }
4138 
4139 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4140   SDLoc DL(N);
4141   EVT VT = N->getValueType(0);
4142 
4143   // fold (udiv x, (1 << c)) -> x >>u c
4144   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4145       DAG.isKnownToBeAPowerOfTwo(N1)) {
4146     SDValue LogBase2 = BuildLogBase2(N1, DL);
4147     AddToWorklist(LogBase2.getNode());
4148 
4149     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4150     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4151     AddToWorklist(Trunc.getNode());
4152     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4153   }
4154 
4155   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4156   if (N1.getOpcode() == ISD::SHL) {
4157     SDValue N10 = N1.getOperand(0);
4158     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4159         DAG.isKnownToBeAPowerOfTwo(N10)) {
4160       SDValue LogBase2 = BuildLogBase2(N10, DL);
4161       AddToWorklist(LogBase2.getNode());
4162 
4163       EVT ADDVT = N1.getOperand(1).getValueType();
4164       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4165       AddToWorklist(Trunc.getNode());
4166       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4167       AddToWorklist(Add.getNode());
4168       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4169     }
4170   }
4171 
4172   // fold (udiv x, c) -> alternate
4173   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4174   if (isConstantOrConstantVector(N1) &&
4175       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4176     if (SDValue Op = BuildUDIV(N))
4177       return Op;
4178 
4179   return SDValue();
4180 }
4181 
4182 // handles ISD::SREM and ISD::UREM
4183 SDValue DAGCombiner::visitREM(SDNode *N) {
4184   unsigned Opcode = N->getOpcode();
4185   SDValue N0 = N->getOperand(0);
4186   SDValue N1 = N->getOperand(1);
4187   EVT VT = N->getValueType(0);
4188   EVT CCVT = getSetCCResultType(VT);
4189 
4190   bool isSigned = (Opcode == ISD::SREM);
4191   SDLoc DL(N);
4192 
4193   // fold (rem c1, c2) -> c1%c2
4194   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4195   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4196     return C;
4197 
4198   // fold (urem X, -1) -> select(X == -1, 0, x)
4199   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4200     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4201                          DAG.getConstant(0, DL, VT), N0);
4202 
4203   if (SDValue V = simplifyDivRem(N, DAG))
4204     return V;
4205 
4206   if (SDValue NewSel = foldBinOpIntoSelect(N))
4207     return NewSel;
4208 
4209   if (isSigned) {
4210     // If we know the sign bits of both operands are zero, strength reduce to a
4211     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4212     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4213       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4214   } else {
4215     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4216     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4217       // fold (urem x, pow2) -> (and x, pow2-1)
4218       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4219       AddToWorklist(Add.getNode());
4220       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4221     }
4222     if (N1.getOpcode() == ISD::SHL &&
4223         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4224       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4225       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4226       AddToWorklist(Add.getNode());
4227       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4228     }
4229   }
4230 
4231   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4232 
4233   // If X/C can be simplified by the division-by-constant logic, lower
4234   // X%C to the equivalent of X-X/C*C.
4235   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4236   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4237   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4238   // combine will not return a DIVREM.  Regardless, checking cheapness here
4239   // makes sense since the simplification results in fatter code.
4240   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4241     SDValue OptimizedDiv =
4242         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4243     if (OptimizedDiv.getNode()) {
4244       // If the equivalent Div node also exists, update its users.
4245       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4246       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4247                                                 { N0, N1 }))
4248         CombineTo(DivNode, OptimizedDiv);
4249       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4250       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4251       AddToWorklist(OptimizedDiv.getNode());
4252       AddToWorklist(Mul.getNode());
4253       return Sub;
4254     }
4255   }
4256 
4257   // sdiv, srem -> sdivrem
4258   if (SDValue DivRem = useDivRem(N))
4259     return DivRem.getValue(1);
4260 
4261   return SDValue();
4262 }
4263 
4264 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4265   SDValue N0 = N->getOperand(0);
4266   SDValue N1 = N->getOperand(1);
4267   EVT VT = N->getValueType(0);
4268   SDLoc DL(N);
4269 
4270   if (VT.isVector()) {
4271     // fold (mulhs x, 0) -> 0
4272     // do not return N0/N1, because undef node may exist.
4273     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4274         ISD::isBuildVectorAllZeros(N1.getNode()))
4275       return DAG.getConstant(0, DL, VT);
4276   }
4277 
4278   // fold (mulhs x, 0) -> 0
4279   if (isNullConstant(N1))
4280     return N1;
4281   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4282   if (isOneConstant(N1))
4283     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4284                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4285                                        getShiftAmountTy(N0.getValueType())));
4286 
4287   // fold (mulhs x, undef) -> 0
4288   if (N0.isUndef() || N1.isUndef())
4289     return DAG.getConstant(0, DL, VT);
4290 
4291   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4292   // plus a shift.
4293   if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
4294     MVT Simple = VT.getSimpleVT();
4295     unsigned SimpleSize = Simple.getSizeInBits();
4296     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4297     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4298       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4299       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4300       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4301       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4302             DAG.getConstant(SimpleSize, DL,
4303                             getShiftAmountTy(N1.getValueType())));
4304       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4305     }
4306   }
4307 
4308   return SDValue();
4309 }
4310 
4311 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4312   SDValue N0 = N->getOperand(0);
4313   SDValue N1 = N->getOperand(1);
4314   EVT VT = N->getValueType(0);
4315   SDLoc DL(N);
4316 
4317   if (VT.isVector()) {
4318     // fold (mulhu x, 0) -> 0
4319     // do not return N0/N1, because undef node may exist.
4320     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4321         ISD::isBuildVectorAllZeros(N1.getNode()))
4322       return DAG.getConstant(0, DL, VT);
4323   }
4324 
4325   // fold (mulhu x, 0) -> 0
4326   if (isNullConstant(N1))
4327     return N1;
4328   // fold (mulhu x, 1) -> 0
4329   if (isOneConstant(N1))
4330     return DAG.getConstant(0, DL, N0.getValueType());
4331   // fold (mulhu x, undef) -> 0
4332   if (N0.isUndef() || N1.isUndef())
4333     return DAG.getConstant(0, DL, VT);
4334 
4335   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4336   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4337       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4338     unsigned NumEltBits = VT.getScalarSizeInBits();
4339     SDValue LogBase2 = BuildLogBase2(N1, DL);
4340     SDValue SRLAmt = DAG.getNode(
4341         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4342     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4343     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4344     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4345   }
4346 
4347   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4348   // plus a shift.
4349   if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
4350     MVT Simple = VT.getSimpleVT();
4351     unsigned SimpleSize = Simple.getSizeInBits();
4352     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4353     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4354       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4355       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4356       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4357       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4358             DAG.getConstant(SimpleSize, DL,
4359                             getShiftAmountTy(N1.getValueType())));
4360       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4361     }
4362   }
4363 
4364   return SDValue();
4365 }
4366 
4367 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4368 /// give the opcodes for the two computations that are being performed. Return
4369 /// true if a simplification was made.
4370 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4371                                                 unsigned HiOp) {
4372   // If the high half is not needed, just compute the low half.
4373   bool HiExists = N->hasAnyUseOfValue(1);
4374   if (!HiExists && (!LegalOperations ||
4375                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4376     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4377     return CombineTo(N, Res, Res);
4378   }
4379 
4380   // If the low half is not needed, just compute the high half.
4381   bool LoExists = N->hasAnyUseOfValue(0);
4382   if (!LoExists && (!LegalOperations ||
4383                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4384     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4385     return CombineTo(N, Res, Res);
4386   }
4387 
4388   // If both halves are used, return as it is.
4389   if (LoExists && HiExists)
4390     return SDValue();
4391 
4392   // If the two computed results can be simplified separately, separate them.
4393   if (LoExists) {
4394     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4395     AddToWorklist(Lo.getNode());
4396     SDValue LoOpt = combine(Lo.getNode());
4397     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4398         (!LegalOperations ||
4399          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4400       return CombineTo(N, LoOpt, LoOpt);
4401   }
4402 
4403   if (HiExists) {
4404     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4405     AddToWorklist(Hi.getNode());
4406     SDValue HiOpt = combine(Hi.getNode());
4407     if (HiOpt.getNode() && HiOpt != Hi &&
4408         (!LegalOperations ||
4409          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4410       return CombineTo(N, HiOpt, HiOpt);
4411   }
4412 
4413   return SDValue();
4414 }
4415 
4416 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4417   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4418     return Res;
4419 
4420   EVT VT = N->getValueType(0);
4421   SDLoc DL(N);
4422 
4423   // If the type is twice as wide is legal, transform the mulhu to a wider
4424   // multiply plus a shift.
4425   if (VT.isSimple() && !VT.isVector()) {
4426     MVT Simple = VT.getSimpleVT();
4427     unsigned SimpleSize = Simple.getSizeInBits();
4428     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4429     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4430       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4431       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4432       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4433       // Compute the high part as N1.
4434       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4435             DAG.getConstant(SimpleSize, DL,
4436                             getShiftAmountTy(Lo.getValueType())));
4437       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4438       // Compute the low part as N0.
4439       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4440       return CombineTo(N, Lo, Hi);
4441     }
4442   }
4443 
4444   return SDValue();
4445 }
4446 
4447 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4448   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4449     return Res;
4450 
4451   EVT VT = N->getValueType(0);
4452   SDLoc DL(N);
4453 
4454   // (umul_lohi N0, 0) -> (0, 0)
4455   if (isNullConstant(N->getOperand(1))) {
4456     SDValue Zero = DAG.getConstant(0, DL, VT);
4457     return CombineTo(N, Zero, Zero);
4458   }
4459 
4460   // (umul_lohi N0, 1) -> (N0, 0)
4461   if (isOneConstant(N->getOperand(1))) {
4462     SDValue Zero = DAG.getConstant(0, DL, VT);
4463     return CombineTo(N, N->getOperand(0), Zero);
4464   }
4465 
4466   // If the type is twice as wide is legal, transform the mulhu to a wider
4467   // multiply plus a shift.
4468   if (VT.isSimple() && !VT.isVector()) {
4469     MVT Simple = VT.getSimpleVT();
4470     unsigned SimpleSize = Simple.getSizeInBits();
4471     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4472     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4473       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4474       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4475       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4476       // Compute the high part as N1.
4477       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4478             DAG.getConstant(SimpleSize, DL,
4479                             getShiftAmountTy(Lo.getValueType())));
4480       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4481       // Compute the low part as N0.
4482       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4483       return CombineTo(N, Lo, Hi);
4484     }
4485   }
4486 
4487   return SDValue();
4488 }
4489 
4490 SDValue DAGCombiner::visitMULO(SDNode *N) {
4491   SDValue N0 = N->getOperand(0);
4492   SDValue N1 = N->getOperand(1);
4493   EVT VT = N0.getValueType();
4494   bool IsSigned = (ISD::SMULO == N->getOpcode());
4495 
4496   EVT CarryVT = N->getValueType(1);
4497   SDLoc DL(N);
4498 
4499   // canonicalize constant to RHS.
4500   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4501       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4502     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4503 
4504   // fold (mulo x, 0) -> 0 + no carry out
4505   if (isNullOrNullSplat(N1))
4506     return CombineTo(N, DAG.getConstant(0, DL, VT),
4507                      DAG.getConstant(0, DL, CarryVT));
4508 
4509   // (mulo x, 2) -> (addo x, x)
4510   if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4511     if (C2->getAPIntValue() == 2)
4512       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4513                          N->getVTList(), N0, N0);
4514 
4515   return SDValue();
4516 }
4517 
4518 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4519   SDValue N0 = N->getOperand(0);
4520   SDValue N1 = N->getOperand(1);
4521   EVT VT = N0.getValueType();
4522   unsigned Opcode = N->getOpcode();
4523 
4524   // fold vector ops
4525   if (VT.isVector())
4526     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4527       return FoldedVOp;
4528 
4529   // fold operation with constant operands.
4530   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4531     return C;
4532 
4533   // canonicalize constant to RHS
4534   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4535       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4536     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4537 
4538   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4539   // Only do this if the current op isn't legal and the flipped is.
4540   if (!TLI.isOperationLegal(Opcode, VT) &&
4541       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4542       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4543     unsigned AltOpcode;
4544     switch (Opcode) {
4545     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4546     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4547     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4548     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4549     default: llvm_unreachable("Unknown MINMAX opcode");
4550     }
4551     if (TLI.isOperationLegal(AltOpcode, VT))
4552       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4553   }
4554 
4555   return SDValue();
4556 }
4557 
4558 /// If this is a bitwise logic instruction and both operands have the same
4559 /// opcode, try to sink the other opcode after the logic instruction.
4560 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4561   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4562   EVT VT = N0.getValueType();
4563   unsigned LogicOpcode = N->getOpcode();
4564   unsigned HandOpcode = N0.getOpcode();
4565   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4566           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4567   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4568 
4569   // Bail early if none of these transforms apply.
4570   if (N0.getNumOperands() == 0)
4571     return SDValue();
4572 
4573   // FIXME: We should check number of uses of the operands to not increase
4574   //        the instruction count for all transforms.
4575 
4576   // Handle size-changing casts.
4577   SDValue X = N0.getOperand(0);
4578   SDValue Y = N1.getOperand(0);
4579   EVT XVT = X.getValueType();
4580   SDLoc DL(N);
4581   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4582       HandOpcode == ISD::SIGN_EXTEND) {
4583     // If both operands have other uses, this transform would create extra
4584     // instructions without eliminating anything.
4585     if (!N0.hasOneUse() && !N1.hasOneUse())
4586       return SDValue();
4587     // We need matching integer source types.
4588     if (XVT != Y.getValueType())
4589       return SDValue();
4590     // Don't create an illegal op during or after legalization. Don't ever
4591     // create an unsupported vector op.
4592     if ((VT.isVector() || LegalOperations) &&
4593         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4594       return SDValue();
4595     // Avoid infinite looping with PromoteIntBinOp.
4596     // TODO: Should we apply desirable/legal constraints to all opcodes?
4597     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4598         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4599       return SDValue();
4600     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4601     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4602     return DAG.getNode(HandOpcode, DL, VT, Logic);
4603   }
4604 
4605   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4606   if (HandOpcode == ISD::TRUNCATE) {
4607     // If both operands have other uses, this transform would create extra
4608     // instructions without eliminating anything.
4609     if (!N0.hasOneUse() && !N1.hasOneUse())
4610       return SDValue();
4611     // We need matching source types.
4612     if (XVT != Y.getValueType())
4613       return SDValue();
4614     // Don't create an illegal op during or after legalization.
4615     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4616       return SDValue();
4617     // Be extra careful sinking truncate. If it's free, there's no benefit in
4618     // widening a binop. Also, don't create a logic op on an illegal type.
4619     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4620       return SDValue();
4621     if (!TLI.isTypeLegal(XVT))
4622       return SDValue();
4623     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4624     return DAG.getNode(HandOpcode, DL, VT, Logic);
4625   }
4626 
4627   // For binops SHL/SRL/SRA/AND:
4628   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4629   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4630        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4631       N0.getOperand(1) == N1.getOperand(1)) {
4632     // If either operand has other uses, this transform is not an improvement.
4633     if (!N0.hasOneUse() || !N1.hasOneUse())
4634       return SDValue();
4635     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4636     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4637   }
4638 
4639   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4640   if (HandOpcode == ISD::BSWAP) {
4641     // If either operand has other uses, this transform is not an improvement.
4642     if (!N0.hasOneUse() || !N1.hasOneUse())
4643       return SDValue();
4644     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4645     return DAG.getNode(HandOpcode, DL, VT, Logic);
4646   }
4647 
4648   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4649   // Only perform this optimization up until type legalization, before
4650   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4651   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4652   // we don't want to undo this promotion.
4653   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4654   // on scalars.
4655   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4656        Level <= AfterLegalizeTypes) {
4657     // Input types must be integer and the same.
4658     if (XVT.isInteger() && XVT == Y.getValueType() &&
4659         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4660           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4661       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4662       return DAG.getNode(HandOpcode, DL, VT, Logic);
4663     }
4664   }
4665 
4666   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4667   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4668   // If both shuffles use the same mask, and both shuffle within a single
4669   // vector, then it is worthwhile to move the swizzle after the operation.
4670   // The type-legalizer generates this pattern when loading illegal
4671   // vector types from memory. In many cases this allows additional shuffle
4672   // optimizations.
4673   // There are other cases where moving the shuffle after the xor/and/or
4674   // is profitable even if shuffles don't perform a swizzle.
4675   // If both shuffles use the same mask, and both shuffles have the same first
4676   // or second operand, then it might still be profitable to move the shuffle
4677   // after the xor/and/or operation.
4678   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4679     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4680     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4681     assert(X.getValueType() == Y.getValueType() &&
4682            "Inputs to shuffles are not the same type");
4683 
4684     // Check that both shuffles use the same mask. The masks are known to be of
4685     // the same length because the result vector type is the same.
4686     // Check also that shuffles have only one use to avoid introducing extra
4687     // instructions.
4688     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4689         !SVN0->getMask().equals(SVN1->getMask()))
4690       return SDValue();
4691 
4692     // Don't try to fold this node if it requires introducing a
4693     // build vector of all zeros that might be illegal at this stage.
4694     SDValue ShOp = N0.getOperand(1);
4695     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4696       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4697 
4698     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4699     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4700       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4701                                   N0.getOperand(0), N1.getOperand(0));
4702       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4703     }
4704 
4705     // Don't try to fold this node if it requires introducing a
4706     // build vector of all zeros that might be illegal at this stage.
4707     ShOp = N0.getOperand(0);
4708     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4709       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4710 
4711     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4712     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4713       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4714                                   N1.getOperand(1));
4715       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4716     }
4717   }
4718 
4719   return SDValue();
4720 }
4721 
4722 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4723 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4724                                        const SDLoc &DL) {
4725   SDValue LL, LR, RL, RR, N0CC, N1CC;
4726   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4727       !isSetCCEquivalent(N1, RL, RR, N1CC))
4728     return SDValue();
4729 
4730   assert(N0.getValueType() == N1.getValueType() &&
4731          "Unexpected operand types for bitwise logic op");
4732   assert(LL.getValueType() == LR.getValueType() &&
4733          RL.getValueType() == RR.getValueType() &&
4734          "Unexpected operand types for setcc");
4735 
4736   // If we're here post-legalization or the logic op type is not i1, the logic
4737   // op type must match a setcc result type. Also, all folds require new
4738   // operations on the left and right operands, so those types must match.
4739   EVT VT = N0.getValueType();
4740   EVT OpVT = LL.getValueType();
4741   if (LegalOperations || VT.getScalarType() != MVT::i1)
4742     if (VT != getSetCCResultType(OpVT))
4743       return SDValue();
4744   if (OpVT != RL.getValueType())
4745     return SDValue();
4746 
4747   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4748   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4749   bool IsInteger = OpVT.isInteger();
4750   if (LR == RR && CC0 == CC1 && IsInteger) {
4751     bool IsZero = isNullOrNullSplat(LR);
4752     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4753 
4754     // All bits clear?
4755     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4756     // All sign bits clear?
4757     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4758     // Any bits set?
4759     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4760     // Any sign bits set?
4761     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4762 
4763     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4764     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4765     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4766     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4767     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4768       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4769       AddToWorklist(Or.getNode());
4770       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4771     }
4772 
4773     // All bits set?
4774     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4775     // All sign bits set?
4776     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4777     // Any bits clear?
4778     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4779     // Any sign bits clear?
4780     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4781 
4782     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4783     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4784     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4785     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4786     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4787       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4788       AddToWorklist(And.getNode());
4789       return DAG.getSetCC(DL, VT, And, LR, CC1);
4790     }
4791   }
4792 
4793   // TODO: What is the 'or' equivalent of this fold?
4794   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4795   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4796       IsInteger && CC0 == ISD::SETNE &&
4797       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4798        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4799     SDValue One = DAG.getConstant(1, DL, OpVT);
4800     SDValue Two = DAG.getConstant(2, DL, OpVT);
4801     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4802     AddToWorklist(Add.getNode());
4803     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4804   }
4805 
4806   // Try more general transforms if the predicates match and the only user of
4807   // the compares is the 'and' or 'or'.
4808   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4809       N0.hasOneUse() && N1.hasOneUse()) {
4810     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4811     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4812     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4813       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4814       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4815       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4816       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4817       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4818     }
4819 
4820     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4821     // TODO - support non-uniform vector amounts.
4822     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4823       // Match a shared variable operand and 2 non-opaque constant operands.
4824       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4825       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4826       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4827         // Canonicalize larger constant as C0.
4828         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4829           std::swap(C0, C1);
4830 
4831         // The difference of the constants must be a single bit.
4832         const APInt &C0Val = C0->getAPIntValue();
4833         const APInt &C1Val = C1->getAPIntValue();
4834         if ((C0Val - C1Val).isPowerOf2()) {
4835           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4836           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4837           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4838           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4839           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4840           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4841           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4842           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4843         }
4844       }
4845     }
4846   }
4847 
4848   // Canonicalize equivalent operands to LL == RL.
4849   if (LL == RR && LR == RL) {
4850     CC1 = ISD::getSetCCSwappedOperands(CC1);
4851     std::swap(RL, RR);
4852   }
4853 
4854   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4855   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4856   if (LL == RL && LR == RR) {
4857     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
4858                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
4859     if (NewCC != ISD::SETCC_INVALID &&
4860         (!LegalOperations ||
4861          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4862           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4863       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4864   }
4865 
4866   return SDValue();
4867 }
4868 
4869 /// This contains all DAGCombine rules which reduce two values combined by
4870 /// an And operation to a single value. This makes them reusable in the context
4871 /// of visitSELECT(). Rules involving constants are not included as
4872 /// visitSELECT() already handles those cases.
4873 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4874   EVT VT = N1.getValueType();
4875   SDLoc DL(N);
4876 
4877   // fold (and x, undef) -> 0
4878   if (N0.isUndef() || N1.isUndef())
4879     return DAG.getConstant(0, DL, VT);
4880 
4881   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4882     return V;
4883 
4884   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4885       VT.getSizeInBits() <= 64) {
4886     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4887       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4888         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4889         // immediate for an add, but it is legal if its top c2 bits are set,
4890         // transform the ADD so the immediate doesn't need to be materialized
4891         // in a register.
4892         APInt ADDC = ADDI->getAPIntValue();
4893         APInt SRLC = SRLI->getAPIntValue();
4894         if (ADDC.getMinSignedBits() <= 64 &&
4895             SRLC.ult(VT.getSizeInBits()) &&
4896             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4897           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4898                                              SRLC.getZExtValue());
4899           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4900             ADDC |= Mask;
4901             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4902               SDLoc DL0(N0);
4903               SDValue NewAdd =
4904                 DAG.getNode(ISD::ADD, DL0, VT,
4905                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4906               CombineTo(N0.getNode(), NewAdd);
4907               // Return N so it doesn't get rechecked!
4908               return SDValue(N, 0);
4909             }
4910           }
4911         }
4912       }
4913     }
4914   }
4915 
4916   // Reduce bit extract of low half of an integer to the narrower type.
4917   // (and (srl i64:x, K), KMask) ->
4918   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4919   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4920     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4921       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4922         unsigned Size = VT.getSizeInBits();
4923         const APInt &AndMask = CAnd->getAPIntValue();
4924         unsigned ShiftBits = CShift->getZExtValue();
4925 
4926         // Bail out, this node will probably disappear anyway.
4927         if (ShiftBits == 0)
4928           return SDValue();
4929 
4930         unsigned MaskBits = AndMask.countTrailingOnes();
4931         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4932 
4933         if (AndMask.isMask() &&
4934             // Required bits must not span the two halves of the integer and
4935             // must fit in the half size type.
4936             (ShiftBits + MaskBits <= Size / 2) &&
4937             TLI.isNarrowingProfitable(VT, HalfVT) &&
4938             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4939             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4940             TLI.isTruncateFree(VT, HalfVT) &&
4941             TLI.isZExtFree(HalfVT, VT)) {
4942           // The isNarrowingProfitable is to avoid regressions on PPC and
4943           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4944           // on downstream users of this. Those patterns could probably be
4945           // extended to handle extensions mixed in.
4946 
4947           SDValue SL(N0);
4948           assert(MaskBits <= Size);
4949 
4950           // Extracting the highest bit of the low half.
4951           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4952           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4953                                       N0.getOperand(0));
4954 
4955           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4956           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4957           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4958           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4959           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4960         }
4961       }
4962     }
4963   }
4964 
4965   return SDValue();
4966 }
4967 
4968 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4969                                    EVT LoadResultTy, EVT &ExtVT) {
4970   if (!AndC->getAPIntValue().isMask())
4971     return false;
4972 
4973   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4974 
4975   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4976   EVT LoadedVT = LoadN->getMemoryVT();
4977 
4978   if (ExtVT == LoadedVT &&
4979       (!LegalOperations ||
4980        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4981     // ZEXTLOAD will match without needing to change the size of the value being
4982     // loaded.
4983     return true;
4984   }
4985 
4986   // Do not change the width of a volatile or atomic loads.
4987   if (!LoadN->isSimple())
4988     return false;
4989 
4990   // Do not generate loads of non-round integer types since these can
4991   // be expensive (and would be wrong if the type is not byte sized).
4992   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4993     return false;
4994 
4995   if (LegalOperations &&
4996       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4997     return false;
4998 
4999   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5000     return false;
5001 
5002   return true;
5003 }
5004 
5005 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5006                                     ISD::LoadExtType ExtType, EVT &MemVT,
5007                                     unsigned ShAmt) {
5008   if (!LDST)
5009     return false;
5010   // Only allow byte offsets.
5011   if (ShAmt % 8)
5012     return false;
5013 
5014   // Do not generate loads of non-round integer types since these can
5015   // be expensive (and would be wrong if the type is not byte sized).
5016   if (!MemVT.isRound())
5017     return false;
5018 
5019   // Don't change the width of a volatile or atomic loads.
5020   if (!LDST->isSimple())
5021     return false;
5022 
5023   // Verify that we are actually reducing a load width here.
5024   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
5025     return false;
5026 
5027   // Ensure that this isn't going to produce an unsupported memory access.
5028   if (ShAmt) {
5029     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5030     const unsigned ByteShAmt = ShAmt / 8;
5031     const Align LDSTAlign = LDST->getAlign();
5032     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5033     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5034                                 LDST->getAddressSpace(), NarrowAlign,
5035                                 LDST->getMemOperand()->getFlags()))
5036       return false;
5037   }
5038 
5039   // It's not possible to generate a constant of extended or untyped type.
5040   EVT PtrType = LDST->getBasePtr().getValueType();
5041   if (PtrType == MVT::Untyped || PtrType.isExtended())
5042     return false;
5043 
5044   if (isa<LoadSDNode>(LDST)) {
5045     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5046     // Don't transform one with multiple uses, this would require adding a new
5047     // load.
5048     if (!SDValue(Load, 0).hasOneUse())
5049       return false;
5050 
5051     if (LegalOperations &&
5052         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5053       return false;
5054 
5055     // For the transform to be legal, the load must produce only two values
5056     // (the value loaded and the chain).  Don't transform a pre-increment
5057     // load, for example, which produces an extra value.  Otherwise the
5058     // transformation is not equivalent, and the downstream logic to replace
5059     // uses gets things wrong.
5060     if (Load->getNumValues() > 2)
5061       return false;
5062 
5063     // If the load that we're shrinking is an extload and we're not just
5064     // discarding the extension we can't simply shrink the load. Bail.
5065     // TODO: It would be possible to merge the extensions in some cases.
5066     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5067         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5068       return false;
5069 
5070     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5071       return false;
5072   } else {
5073     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5074     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5075     // Can't write outside the original store
5076     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5077       return false;
5078 
5079     if (LegalOperations &&
5080         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5081       return false;
5082   }
5083   return true;
5084 }
5085 
5086 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5087                                     SmallVectorImpl<LoadSDNode*> &Loads,
5088                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5089                                     ConstantSDNode *Mask,
5090                                     SDNode *&NodeToMask) {
5091   // Recursively search for the operands, looking for loads which can be
5092   // narrowed.
5093   for (SDValue Op : N->op_values()) {
5094     if (Op.getValueType().isVector())
5095       return false;
5096 
5097     // Some constants may need fixing up later if they are too large.
5098     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5099       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5100           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5101         NodesWithConsts.insert(N);
5102       continue;
5103     }
5104 
5105     if (!Op.hasOneUse())
5106       return false;
5107 
5108     switch(Op.getOpcode()) {
5109     case ISD::LOAD: {
5110       auto *Load = cast<LoadSDNode>(Op);
5111       EVT ExtVT;
5112       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5113           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5114 
5115         // ZEXTLOAD is already small enough.
5116         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5117             ExtVT.bitsGE(Load->getMemoryVT()))
5118           continue;
5119 
5120         // Use LE to convert equal sized loads to zext.
5121         if (ExtVT.bitsLE(Load->getMemoryVT()))
5122           Loads.push_back(Load);
5123 
5124         continue;
5125       }
5126       return false;
5127     }
5128     case ISD::ZERO_EXTEND:
5129     case ISD::AssertZext: {
5130       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5131       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5132       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5133         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5134         Op.getOperand(0).getValueType();
5135 
5136       // We can accept extending nodes if the mask is wider or an equal
5137       // width to the original type.
5138       if (ExtVT.bitsGE(VT))
5139         continue;
5140       break;
5141     }
5142     case ISD::OR:
5143     case ISD::XOR:
5144     case ISD::AND:
5145       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5146                              NodeToMask))
5147         return false;
5148       continue;
5149     }
5150 
5151     // Allow one node which will masked along with any loads found.
5152     if (NodeToMask)
5153       return false;
5154 
5155     // Also ensure that the node to be masked only produces one data result.
5156     NodeToMask = Op.getNode();
5157     if (NodeToMask->getNumValues() > 1) {
5158       bool HasValue = false;
5159       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5160         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5161         if (VT != MVT::Glue && VT != MVT::Other) {
5162           if (HasValue) {
5163             NodeToMask = nullptr;
5164             return false;
5165           }
5166           HasValue = true;
5167         }
5168       }
5169       assert(HasValue && "Node to be masked has no data result?");
5170     }
5171   }
5172   return true;
5173 }
5174 
5175 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5176   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5177   if (!Mask)
5178     return false;
5179 
5180   if (!Mask->getAPIntValue().isMask())
5181     return false;
5182 
5183   // No need to do anything if the and directly uses a load.
5184   if (isa<LoadSDNode>(N->getOperand(0)))
5185     return false;
5186 
5187   SmallVector<LoadSDNode*, 8> Loads;
5188   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5189   SDNode *FixupNode = nullptr;
5190   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5191     if (Loads.size() == 0)
5192       return false;
5193 
5194     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5195     SDValue MaskOp = N->getOperand(1);
5196 
5197     // If it exists, fixup the single node we allow in the tree that needs
5198     // masking.
5199     if (FixupNode) {
5200       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5201       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5202                                 FixupNode->getValueType(0),
5203                                 SDValue(FixupNode, 0), MaskOp);
5204       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5205       if (And.getOpcode() == ISD ::AND)
5206         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5207     }
5208 
5209     // Narrow any constants that need it.
5210     for (auto *LogicN : NodesWithConsts) {
5211       SDValue Op0 = LogicN->getOperand(0);
5212       SDValue Op1 = LogicN->getOperand(1);
5213 
5214       if (isa<ConstantSDNode>(Op0))
5215           std::swap(Op0, Op1);
5216 
5217       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5218                                 Op1, MaskOp);
5219 
5220       DAG.UpdateNodeOperands(LogicN, Op0, And);
5221     }
5222 
5223     // Create narrow loads.
5224     for (auto *Load : Loads) {
5225       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5226       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5227                                 SDValue(Load, 0), MaskOp);
5228       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5229       if (And.getOpcode() == ISD ::AND)
5230         And = SDValue(
5231             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5232       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5233       assert(NewLoad &&
5234              "Shouldn't be masking the load if it can't be narrowed");
5235       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5236     }
5237     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5238     return true;
5239   }
5240   return false;
5241 }
5242 
5243 // Unfold
5244 //    x &  (-1 'logical shift' y)
5245 // To
5246 //    (x 'opposite logical shift' y) 'logical shift' y
5247 // if it is better for performance.
5248 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5249   assert(N->getOpcode() == ISD::AND);
5250 
5251   SDValue N0 = N->getOperand(0);
5252   SDValue N1 = N->getOperand(1);
5253 
5254   // Do we actually prefer shifts over mask?
5255   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5256     return SDValue();
5257 
5258   // Try to match  (-1 '[outer] logical shift' y)
5259   unsigned OuterShift;
5260   unsigned InnerShift; // The opposite direction to the OuterShift.
5261   SDValue Y;           // Shift amount.
5262   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5263     if (!M.hasOneUse())
5264       return false;
5265     OuterShift = M->getOpcode();
5266     if (OuterShift == ISD::SHL)
5267       InnerShift = ISD::SRL;
5268     else if (OuterShift == ISD::SRL)
5269       InnerShift = ISD::SHL;
5270     else
5271       return false;
5272     if (!isAllOnesConstant(M->getOperand(0)))
5273       return false;
5274     Y = M->getOperand(1);
5275     return true;
5276   };
5277 
5278   SDValue X;
5279   if (matchMask(N1))
5280     X = N0;
5281   else if (matchMask(N0))
5282     X = N1;
5283   else
5284     return SDValue();
5285 
5286   SDLoc DL(N);
5287   EVT VT = N->getValueType(0);
5288 
5289   //     tmp = x   'opposite logical shift' y
5290   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5291   //     ret = tmp 'logical shift' y
5292   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5293 
5294   return T1;
5295 }
5296 
5297 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5298 /// For a target with a bit test, this is expected to become test + set and save
5299 /// at least 1 instruction.
5300 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5301   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5302 
5303   // This is probably not worthwhile without a supported type.
5304   EVT VT = And->getValueType(0);
5305   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5306   if (!TLI.isTypeLegal(VT))
5307     return SDValue();
5308 
5309   // Look through an optional extension and find a 'not'.
5310   // TODO: Should we favor test+set even without the 'not' op?
5311   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5312   if (Not.getOpcode() == ISD::ANY_EXTEND)
5313     Not = Not.getOperand(0);
5314   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5315     return SDValue();
5316 
5317   // Look though an optional truncation. The source operand may not be the same
5318   // type as the original 'and', but that is ok because we are masking off
5319   // everything but the low bit.
5320   SDValue Srl = Not.getOperand(0);
5321   if (Srl.getOpcode() == ISD::TRUNCATE)
5322     Srl = Srl.getOperand(0);
5323 
5324   // Match a shift-right by constant.
5325   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5326       !isa<ConstantSDNode>(Srl.getOperand(1)))
5327     return SDValue();
5328 
5329   // We might have looked through casts that make this transform invalid.
5330   // TODO: If the source type is wider than the result type, do the mask and
5331   //       compare in the source type.
5332   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5333   unsigned VTBitWidth = VT.getSizeInBits();
5334   if (ShiftAmt.uge(VTBitWidth))
5335     return SDValue();
5336 
5337   // Turn this into a bit-test pattern using mask op + setcc:
5338   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5339   SDLoc DL(And);
5340   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5341   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5342   SDValue Mask = DAG.getConstant(
5343       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5344   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5345   SDValue Zero = DAG.getConstant(0, DL, VT);
5346   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5347   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5348 }
5349 
5350 SDValue DAGCombiner::visitAND(SDNode *N) {
5351   SDValue N0 = N->getOperand(0);
5352   SDValue N1 = N->getOperand(1);
5353   EVT VT = N1.getValueType();
5354 
5355   // x & x --> x
5356   if (N0 == N1)
5357     return N0;
5358 
5359   // fold vector ops
5360   if (VT.isVector()) {
5361     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5362       return FoldedVOp;
5363 
5364     // fold (and x, 0) -> 0, vector edition
5365     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5366       // do not return N0, because undef node may exist in N0
5367       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5368                              SDLoc(N), N0.getValueType());
5369     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5370       // do not return N1, because undef node may exist in N1
5371       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5372                              SDLoc(N), N1.getValueType());
5373 
5374     // fold (and x, -1) -> x, vector edition
5375     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5376       return N1;
5377     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5378       return N0;
5379 
5380     // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5381     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5382     auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5383     if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5384         N0.hasOneUse() && N1.hasOneUse()) {
5385       EVT LoadVT = MLoad->getMemoryVT();
5386       EVT ExtVT = VT;
5387       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5388         // For this AND to be a zero extension of the masked load the elements
5389         // of the BuildVec must mask the bottom bits of the extended element
5390         // type
5391         if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5392           uint64_t ElementSize =
5393               LoadVT.getVectorElementType().getScalarSizeInBits();
5394           if (Splat->getAPIntValue().isMask(ElementSize)) {
5395             return DAG.getMaskedLoad(
5396                 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5397                 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5398                 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5399                 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5400           }
5401         }
5402       }
5403     }
5404   }
5405 
5406   // fold (and c1, c2) -> c1&c2
5407   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5408   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5409     return C;
5410 
5411   // canonicalize constant to RHS
5412   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5413       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5414     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5415 
5416   // fold (and x, -1) -> x
5417   if (isAllOnesConstant(N1))
5418     return N0;
5419 
5420   // if (and x, c) is known to be zero, return 0
5421   unsigned BitWidth = VT.getScalarSizeInBits();
5422   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5423                                    APInt::getAllOnesValue(BitWidth)))
5424     return DAG.getConstant(0, SDLoc(N), VT);
5425 
5426   if (SDValue NewSel = foldBinOpIntoSelect(N))
5427     return NewSel;
5428 
5429   // reassociate and
5430   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5431     return RAND;
5432 
5433   // Try to convert a constant mask AND into a shuffle clear mask.
5434   if (VT.isVector())
5435     if (SDValue Shuffle = XformToShuffleWithZero(N))
5436       return Shuffle;
5437 
5438   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5439     return Combined;
5440 
5441   // fold (and (or x, C), D) -> D if (C & D) == D
5442   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5443     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5444   };
5445   if (N0.getOpcode() == ISD::OR &&
5446       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5447     return N1;
5448   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5449   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5450     SDValue N0Op0 = N0.getOperand(0);
5451     APInt Mask = ~N1C->getAPIntValue();
5452     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5453     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5454       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5455                                  N0.getValueType(), N0Op0);
5456 
5457       // Replace uses of the AND with uses of the Zero extend node.
5458       CombineTo(N, Zext);
5459 
5460       // We actually want to replace all uses of the any_extend with the
5461       // zero_extend, to avoid duplicating things.  This will later cause this
5462       // AND to be folded.
5463       CombineTo(N0.getNode(), Zext);
5464       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5465     }
5466   }
5467 
5468   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5469   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5470   // already be zero by virtue of the width of the base type of the load.
5471   //
5472   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5473   // more cases.
5474   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5475        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5476        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5477        N0.getOperand(0).getResNo() == 0) ||
5478       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5479     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5480                                          N0 : N0.getOperand(0) );
5481 
5482     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5483     // This can be a pure constant or a vector splat, in which case we treat the
5484     // vector as a scalar and use the splat value.
5485     APInt Constant = APInt::getNullValue(1);
5486     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5487       Constant = C->getAPIntValue();
5488     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5489       APInt SplatValue, SplatUndef;
5490       unsigned SplatBitSize;
5491       bool HasAnyUndefs;
5492       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5493                                              SplatBitSize, HasAnyUndefs);
5494       if (IsSplat) {
5495         // Undef bits can contribute to a possible optimisation if set, so
5496         // set them.
5497         SplatValue |= SplatUndef;
5498 
5499         // The splat value may be something like "0x00FFFFFF", which means 0 for
5500         // the first vector value and FF for the rest, repeating. We need a mask
5501         // that will apply equally to all members of the vector, so AND all the
5502         // lanes of the constant together.
5503         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5504 
5505         // If the splat value has been compressed to a bitlength lower
5506         // than the size of the vector lane, we need to re-expand it to
5507         // the lane size.
5508         if (EltBitWidth > SplatBitSize)
5509           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5510                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5511             SplatValue |= SplatValue.shl(SplatBitSize);
5512 
5513         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5514         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5515         if ((SplatBitSize % EltBitWidth) == 0) {
5516           Constant = APInt::getAllOnesValue(EltBitWidth);
5517           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5518             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5519         }
5520       }
5521     }
5522 
5523     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5524     // actually legal and isn't going to get expanded, else this is a false
5525     // optimisation.
5526     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5527                                                     Load->getValueType(0),
5528                                                     Load->getMemoryVT());
5529 
5530     // Resize the constant to the same size as the original memory access before
5531     // extension. If it is still the AllOnesValue then this AND is completely
5532     // unneeded.
5533     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5534 
5535     bool B;
5536     switch (Load->getExtensionType()) {
5537     default: B = false; break;
5538     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5539     case ISD::ZEXTLOAD:
5540     case ISD::NON_EXTLOAD: B = true; break;
5541     }
5542 
5543     if (B && Constant.isAllOnesValue()) {
5544       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5545       // preserve semantics once we get rid of the AND.
5546       SDValue NewLoad(Load, 0);
5547 
5548       // Fold the AND away. NewLoad may get replaced immediately.
5549       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5550 
5551       if (Load->getExtensionType() == ISD::EXTLOAD) {
5552         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5553                               Load->getValueType(0), SDLoc(Load),
5554                               Load->getChain(), Load->getBasePtr(),
5555                               Load->getOffset(), Load->getMemoryVT(),
5556                               Load->getMemOperand());
5557         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5558         if (Load->getNumValues() == 3) {
5559           // PRE/POST_INC loads have 3 values.
5560           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5561                            NewLoad.getValue(2) };
5562           CombineTo(Load, To, 3, true);
5563         } else {
5564           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5565         }
5566       }
5567 
5568       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5569     }
5570   }
5571 
5572   // fold (and (load x), 255) -> (zextload x, i8)
5573   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5574   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5575   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5576                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5577                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5578     if (SDValue Res = ReduceLoadWidth(N)) {
5579       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5580         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5581       AddToWorklist(N);
5582       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5583       return SDValue(N, 0);
5584     }
5585   }
5586 
5587   if (LegalTypes) {
5588     // Attempt to propagate the AND back up to the leaves which, if they're
5589     // loads, can be combined to narrow loads and the AND node can be removed.
5590     // Perform after legalization so that extend nodes will already be
5591     // combined into the loads.
5592     if (BackwardsPropagateMask(N))
5593       return SDValue(N, 0);
5594   }
5595 
5596   if (SDValue Combined = visitANDLike(N0, N1, N))
5597     return Combined;
5598 
5599   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5600   if (N0.getOpcode() == N1.getOpcode())
5601     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5602       return V;
5603 
5604   // Masking the negated extension of a boolean is just the zero-extended
5605   // boolean:
5606   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5607   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5608   //
5609   // Note: the SimplifyDemandedBits fold below can make an information-losing
5610   // transform, and then we have no way to find this better fold.
5611   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5612     if (isNullOrNullSplat(N0.getOperand(0))) {
5613       SDValue SubRHS = N0.getOperand(1);
5614       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5615           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5616         return SubRHS;
5617       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5618           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5619         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5620     }
5621   }
5622 
5623   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5624   // fold (and (sra)) -> (and (srl)) when possible.
5625   if (SimplifyDemandedBits(SDValue(N, 0)))
5626     return SDValue(N, 0);
5627 
5628   // fold (zext_inreg (extload x)) -> (zextload x)
5629   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5630   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5631       (ISD::isEXTLoad(N0.getNode()) ||
5632        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5633     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5634     EVT MemVT = LN0->getMemoryVT();
5635     // If we zero all the possible extended bits, then we can turn this into
5636     // a zextload if we are running before legalize or the operation is legal.
5637     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5638     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5639     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5640     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5641         ((!LegalOperations && LN0->isSimple()) ||
5642          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5643       SDValue ExtLoad =
5644           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5645                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5646       AddToWorklist(N);
5647       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5648       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5649     }
5650   }
5651 
5652   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5653   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5654     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5655                                            N0.getOperand(1), false))
5656       return BSwap;
5657   }
5658 
5659   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5660     return Shifts;
5661 
5662   if (TLI.hasBitTest(N0, N1))
5663     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5664       return V;
5665 
5666   return SDValue();
5667 }
5668 
5669 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5670 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5671                                         bool DemandHighBits) {
5672   if (!LegalOperations)
5673     return SDValue();
5674 
5675   EVT VT = N->getValueType(0);
5676   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5677     return SDValue();
5678   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5679     return SDValue();
5680 
5681   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5682   bool LookPassAnd0 = false;
5683   bool LookPassAnd1 = false;
5684   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5685       std::swap(N0, N1);
5686   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5687       std::swap(N0, N1);
5688   if (N0.getOpcode() == ISD::AND) {
5689     if (!N0.getNode()->hasOneUse())
5690       return SDValue();
5691     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5692     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5693     // This is needed for X86.
5694     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5695                   N01C->getZExtValue() != 0xFFFF))
5696       return SDValue();
5697     N0 = N0.getOperand(0);
5698     LookPassAnd0 = true;
5699   }
5700 
5701   if (N1.getOpcode() == ISD::AND) {
5702     if (!N1.getNode()->hasOneUse())
5703       return SDValue();
5704     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5705     if (!N11C || N11C->getZExtValue() != 0xFF)
5706       return SDValue();
5707     N1 = N1.getOperand(0);
5708     LookPassAnd1 = true;
5709   }
5710 
5711   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5712     std::swap(N0, N1);
5713   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5714     return SDValue();
5715   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5716     return SDValue();
5717 
5718   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5719   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5720   if (!N01C || !N11C)
5721     return SDValue();
5722   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5723     return SDValue();
5724 
5725   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5726   SDValue N00 = N0->getOperand(0);
5727   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5728     if (!N00.getNode()->hasOneUse())
5729       return SDValue();
5730     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5731     if (!N001C || N001C->getZExtValue() != 0xFF)
5732       return SDValue();
5733     N00 = N00.getOperand(0);
5734     LookPassAnd0 = true;
5735   }
5736 
5737   SDValue N10 = N1->getOperand(0);
5738   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5739     if (!N10.getNode()->hasOneUse())
5740       return SDValue();
5741     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5742     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5743     // for X86.
5744     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5745                    N101C->getZExtValue() != 0xFFFF))
5746       return SDValue();
5747     N10 = N10.getOperand(0);
5748     LookPassAnd1 = true;
5749   }
5750 
5751   if (N00 != N10)
5752     return SDValue();
5753 
5754   // Make sure everything beyond the low halfword gets set to zero since the SRL
5755   // 16 will clear the top bits.
5756   unsigned OpSizeInBits = VT.getSizeInBits();
5757   if (DemandHighBits && OpSizeInBits > 16) {
5758     // If the left-shift isn't masked out then the only way this is a bswap is
5759     // if all bits beyond the low 8 are 0. In that case the entire pattern
5760     // reduces to a left shift anyway: leave it for other parts of the combiner.
5761     if (!LookPassAnd0)
5762       return SDValue();
5763 
5764     // However, if the right shift isn't masked out then it might be because
5765     // it's not needed. See if we can spot that too.
5766     if (!LookPassAnd1 &&
5767         !DAG.MaskedValueIsZero(
5768             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5769       return SDValue();
5770   }
5771 
5772   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5773   if (OpSizeInBits > 16) {
5774     SDLoc DL(N);
5775     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5776                       DAG.getConstant(OpSizeInBits - 16, DL,
5777                                       getShiftAmountTy(VT)));
5778   }
5779   return Res;
5780 }
5781 
5782 /// Return true if the specified node is an element that makes up a 32-bit
5783 /// packed halfword byteswap.
5784 /// ((x & 0x000000ff) << 8) |
5785 /// ((x & 0x0000ff00) >> 8) |
5786 /// ((x & 0x00ff0000) << 8) |
5787 /// ((x & 0xff000000) >> 8)
5788 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5789   if (!N.getNode()->hasOneUse())
5790     return false;
5791 
5792   unsigned Opc = N.getOpcode();
5793   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5794     return false;
5795 
5796   SDValue N0 = N.getOperand(0);
5797   unsigned Opc0 = N0.getOpcode();
5798   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5799     return false;
5800 
5801   ConstantSDNode *N1C = nullptr;
5802   // SHL or SRL: look upstream for AND mask operand
5803   if (Opc == ISD::AND)
5804     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5805   else if (Opc0 == ISD::AND)
5806     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5807   if (!N1C)
5808     return false;
5809 
5810   unsigned MaskByteOffset;
5811   switch (N1C->getZExtValue()) {
5812   default:
5813     return false;
5814   case 0xFF:       MaskByteOffset = 0; break;
5815   case 0xFF00:     MaskByteOffset = 1; break;
5816   case 0xFFFF:
5817     // In case demanded bits didn't clear the bits that will be shifted out.
5818     // This is needed for X86.
5819     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5820       MaskByteOffset = 1;
5821       break;
5822     }
5823     return false;
5824   case 0xFF0000:   MaskByteOffset = 2; break;
5825   case 0xFF000000: MaskByteOffset = 3; break;
5826   }
5827 
5828   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5829   if (Opc == ISD::AND) {
5830     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5831       // (x >> 8) & 0xff
5832       // (x >> 8) & 0xff0000
5833       if (Opc0 != ISD::SRL)
5834         return false;
5835       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5836       if (!C || C->getZExtValue() != 8)
5837         return false;
5838     } else {
5839       // (x << 8) & 0xff00
5840       // (x << 8) & 0xff000000
5841       if (Opc0 != ISD::SHL)
5842         return false;
5843       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5844       if (!C || C->getZExtValue() != 8)
5845         return false;
5846     }
5847   } else if (Opc == ISD::SHL) {
5848     // (x & 0xff) << 8
5849     // (x & 0xff0000) << 8
5850     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5851       return false;
5852     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5853     if (!C || C->getZExtValue() != 8)
5854       return false;
5855   } else { // Opc == ISD::SRL
5856     // (x & 0xff00) >> 8
5857     // (x & 0xff000000) >> 8
5858     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5859       return false;
5860     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5861     if (!C || C->getZExtValue() != 8)
5862       return false;
5863   }
5864 
5865   if (Parts[MaskByteOffset])
5866     return false;
5867 
5868   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5869   return true;
5870 }
5871 
5872 // Match 2 elements of a packed halfword bswap.
5873 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
5874   if (N.getOpcode() == ISD::OR)
5875     return isBSwapHWordElement(N.getOperand(0), Parts) &&
5876            isBSwapHWordElement(N.getOperand(1), Parts);
5877 
5878   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
5879     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
5880     if (!C || C->getAPIntValue() != 16)
5881       return false;
5882     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
5883     return true;
5884   }
5885 
5886   return false;
5887 }
5888 
5889 // Match this pattern:
5890 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
5891 // And rewrite this to:
5892 //   (rotr (bswap A), 16)
5893 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
5894                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
5895                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
5896   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
5897          "MatchBSwapHWordOrAndAnd: expecting i32");
5898   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5899     return SDValue();
5900   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
5901     return SDValue();
5902   // TODO: this is too restrictive; lifting this restriction requires more tests
5903   if (!N0->hasOneUse() || !N1->hasOneUse())
5904     return SDValue();
5905   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
5906   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
5907   if (!Mask0 || !Mask1)
5908     return SDValue();
5909   if (Mask0->getAPIntValue() != 0xff00ff00 ||
5910       Mask1->getAPIntValue() != 0x00ff00ff)
5911     return SDValue();
5912   SDValue Shift0 = N0.getOperand(0);
5913   SDValue Shift1 = N1.getOperand(0);
5914   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
5915     return SDValue();
5916   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
5917   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
5918   if (!ShiftAmt0 || !ShiftAmt1)
5919     return SDValue();
5920   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
5921     return SDValue();
5922   if (Shift0.getOperand(0) != Shift1.getOperand(0))
5923     return SDValue();
5924 
5925   SDLoc DL(N);
5926   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
5927   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
5928   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5929 }
5930 
5931 /// Match a 32-bit packed halfword bswap. That is
5932 /// ((x & 0x000000ff) << 8) |
5933 /// ((x & 0x0000ff00) >> 8) |
5934 /// ((x & 0x00ff0000) << 8) |
5935 /// ((x & 0xff000000) >> 8)
5936 /// => (rotl (bswap x), 16)
5937 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5938   if (!LegalOperations)
5939     return SDValue();
5940 
5941   EVT VT = N->getValueType(0);
5942   if (VT != MVT::i32)
5943     return SDValue();
5944   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5945     return SDValue();
5946 
5947   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
5948                                               getShiftAmountTy(VT)))
5949   return BSwap;
5950 
5951   // Try again with commuted operands.
5952   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
5953                                               getShiftAmountTy(VT)))
5954   return BSwap;
5955 
5956 
5957   // Look for either
5958   // (or (bswaphpair), (bswaphpair))
5959   // (or (or (bswaphpair), (and)), (and))
5960   // (or (or (and), (bswaphpair)), (and))
5961   SDNode *Parts[4] = {};
5962 
5963   if (isBSwapHWordPair(N0, Parts)) {
5964     // (or (or (and), (and)), (or (and), (and)))
5965     if (!isBSwapHWordPair(N1, Parts))
5966       return SDValue();
5967   } else if (N0.getOpcode() == ISD::OR) {
5968     // (or (or (or (and), (and)), (and)), (and))
5969     if (!isBSwapHWordElement(N1, Parts))
5970       return SDValue();
5971     SDValue N00 = N0.getOperand(0);
5972     SDValue N01 = N0.getOperand(1);
5973     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
5974         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
5975       return SDValue();
5976   } else
5977     return SDValue();
5978 
5979   // Make sure the parts are all coming from the same node.
5980   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5981     return SDValue();
5982 
5983   SDLoc DL(N);
5984   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5985                               SDValue(Parts[0], 0));
5986 
5987   // Result of the bswap should be rotated by 16. If it's not legal, then
5988   // do  (x << 16) | (x >> 16).
5989   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5990   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5991     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5992   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5993     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5994   return DAG.getNode(ISD::OR, DL, VT,
5995                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5996                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5997 }
5998 
5999 /// This contains all DAGCombine rules which reduce two values combined by
6000 /// an Or operation to a single value \see visitANDLike().
6001 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6002   EVT VT = N1.getValueType();
6003   SDLoc DL(N);
6004 
6005   // fold (or x, undef) -> -1
6006   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6007     return DAG.getAllOnesConstant(DL, VT);
6008 
6009   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6010     return V;
6011 
6012   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6013   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6014       // Don't increase # computations.
6015       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6016     // We can only do this xform if we know that bits from X that are set in C2
6017     // but not in C1 are already zero.  Likewise for Y.
6018     if (const ConstantSDNode *N0O1C =
6019         getAsNonOpaqueConstant(N0.getOperand(1))) {
6020       if (const ConstantSDNode *N1O1C =
6021           getAsNonOpaqueConstant(N1.getOperand(1))) {
6022         // We can only do this xform if we know that bits from X that are set in
6023         // C2 but not in C1 are already zero.  Likewise for Y.
6024         const APInt &LHSMask = N0O1C->getAPIntValue();
6025         const APInt &RHSMask = N1O1C->getAPIntValue();
6026 
6027         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6028             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6029           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6030                                   N0.getOperand(0), N1.getOperand(0));
6031           return DAG.getNode(ISD::AND, DL, VT, X,
6032                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6033         }
6034       }
6035     }
6036   }
6037 
6038   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6039   if (N0.getOpcode() == ISD::AND &&
6040       N1.getOpcode() == ISD::AND &&
6041       N0.getOperand(0) == N1.getOperand(0) &&
6042       // Don't increase # computations.
6043       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6044     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6045                             N0.getOperand(1), N1.getOperand(1));
6046     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6047   }
6048 
6049   return SDValue();
6050 }
6051 
6052 /// OR combines for which the commuted variant will be tried as well.
6053 static SDValue visitORCommutative(
6054     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6055   EVT VT = N0.getValueType();
6056   if (N0.getOpcode() == ISD::AND) {
6057     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6058     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6059       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6060 
6061     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6062     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6063       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6064   }
6065 
6066   return SDValue();
6067 }
6068 
6069 SDValue DAGCombiner::visitOR(SDNode *N) {
6070   SDValue N0 = N->getOperand(0);
6071   SDValue N1 = N->getOperand(1);
6072   EVT VT = N1.getValueType();
6073 
6074   // x | x --> x
6075   if (N0 == N1)
6076     return N0;
6077 
6078   // fold vector ops
6079   if (VT.isVector()) {
6080     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6081       return FoldedVOp;
6082 
6083     // fold (or x, 0) -> x, vector edition
6084     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6085       return N1;
6086     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6087       return N0;
6088 
6089     // fold (or x, -1) -> -1, vector edition
6090     if (ISD::isBuildVectorAllOnes(N0.getNode()))
6091       // do not return N0, because undef node may exist in N0
6092       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6093     if (ISD::isBuildVectorAllOnes(N1.getNode()))
6094       // do not return N1, because undef node may exist in N1
6095       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6096 
6097     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6098     // Do this only if the resulting shuffle is legal.
6099     if (isa<ShuffleVectorSDNode>(N0) &&
6100         isa<ShuffleVectorSDNode>(N1) &&
6101         // Avoid folding a node with illegal type.
6102         TLI.isTypeLegal(VT)) {
6103       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6104       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6105       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6106       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6107       // Ensure both shuffles have a zero input.
6108       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6109         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6110         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6111         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6112         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6113         bool CanFold = true;
6114         int NumElts = VT.getVectorNumElements();
6115         SmallVector<int, 4> Mask(NumElts);
6116 
6117         for (int i = 0; i != NumElts; ++i) {
6118           int M0 = SV0->getMaskElt(i);
6119           int M1 = SV1->getMaskElt(i);
6120 
6121           // Determine if either index is pointing to a zero vector.
6122           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6123           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6124 
6125           // If one element is zero and the otherside is undef, keep undef.
6126           // This also handles the case that both are undef.
6127           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6128             Mask[i] = -1;
6129             continue;
6130           }
6131 
6132           // Make sure only one of the elements is zero.
6133           if (M0Zero == M1Zero) {
6134             CanFold = false;
6135             break;
6136           }
6137 
6138           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6139 
6140           // We have a zero and non-zero element. If the non-zero came from
6141           // SV0 make the index a LHS index. If it came from SV1, make it
6142           // a RHS index. We need to mod by NumElts because we don't care
6143           // which operand it came from in the original shuffles.
6144           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6145         }
6146 
6147         if (CanFold) {
6148           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6149           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6150 
6151           SDValue LegalShuffle =
6152               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6153                                           Mask, DAG);
6154           if (LegalShuffle)
6155             return LegalShuffle;
6156         }
6157       }
6158     }
6159   }
6160 
6161   // fold (or c1, c2) -> c1|c2
6162   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6163   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6164     return C;
6165 
6166   // canonicalize constant to RHS
6167   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6168      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6169     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6170 
6171   // fold (or x, 0) -> x
6172   if (isNullConstant(N1))
6173     return N0;
6174 
6175   // fold (or x, -1) -> -1
6176   if (isAllOnesConstant(N1))
6177     return N1;
6178 
6179   if (SDValue NewSel = foldBinOpIntoSelect(N))
6180     return NewSel;
6181 
6182   // fold (or x, c) -> c iff (x & ~c) == 0
6183   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6184     return N1;
6185 
6186   if (SDValue Combined = visitORLike(N0, N1, N))
6187     return Combined;
6188 
6189   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6190     return Combined;
6191 
6192   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6193   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6194     return BSwap;
6195   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6196     return BSwap;
6197 
6198   // reassociate or
6199   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6200     return ROR;
6201 
6202   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6203   // iff (c1 & c2) != 0 or c1/c2 are undef.
6204   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6205     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6206   };
6207   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6208       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6209     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6210                                                  {N1, N0.getOperand(1)})) {
6211       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6212       AddToWorklist(IOR.getNode());
6213       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6214     }
6215   }
6216 
6217   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6218     return Combined;
6219   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6220     return Combined;
6221 
6222   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6223   if (N0.getOpcode() == N1.getOpcode())
6224     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6225       return V;
6226 
6227   // See if this is some rotate idiom.
6228   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6229     return Rot;
6230 
6231   if (SDValue Load = MatchLoadCombine(N))
6232     return Load;
6233 
6234   // Simplify the operands using demanded-bits information.
6235   if (SimplifyDemandedBits(SDValue(N, 0)))
6236     return SDValue(N, 0);
6237 
6238   // If OR can be rewritten into ADD, try combines based on ADD.
6239   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6240       DAG.haveNoCommonBitsSet(N0, N1))
6241     if (SDValue Combined = visitADDLike(N))
6242       return Combined;
6243 
6244   return SDValue();
6245 }
6246 
6247 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6248   if (Op.getOpcode() == ISD::AND &&
6249       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6250     Mask = Op.getOperand(1);
6251     return Op.getOperand(0);
6252   }
6253   return Op;
6254 }
6255 
6256 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6257 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6258                             SDValue &Mask) {
6259   Op = stripConstantMask(DAG, Op, Mask);
6260   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6261     Shift = Op;
6262     return true;
6263   }
6264   return false;
6265 }
6266 
6267 /// Helper function for visitOR to extract the needed side of a rotate idiom
6268 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6269 /// InstCombine merged some outside op with one of the shifts from
6270 /// the rotate pattern.
6271 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6272 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6273 /// patterns:
6274 ///
6275 ///   (or (add v v) (shrl v bitwidth-1)):
6276 ///     expands (add v v) -> (shl v 1)
6277 ///
6278 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6279 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6280 ///
6281 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6282 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6283 ///
6284 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6285 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6286 ///
6287 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6288 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6289 ///
6290 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6291 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6292                                      SDValue ExtractFrom, SDValue &Mask,
6293                                      const SDLoc &DL) {
6294   assert(OppShift && ExtractFrom && "Empty SDValue");
6295   assert(
6296       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6297       "Existing shift must be valid as a rotate half");
6298 
6299   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6300 
6301   // Value and Type of the shift.
6302   SDValue OppShiftLHS = OppShift.getOperand(0);
6303   EVT ShiftedVT = OppShiftLHS.getValueType();
6304 
6305   // Amount of the existing shift.
6306   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6307 
6308   // (add v v) -> (shl v 1)
6309   // TODO: Should this be a general DAG canonicalization?
6310   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6311       ExtractFrom.getOpcode() == ISD::ADD &&
6312       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6313       ExtractFrom.getOperand(0) == OppShiftLHS &&
6314       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6315     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6316                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6317 
6318   // Preconditions:
6319   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6320   //
6321   // Find opcode of the needed shift to be extracted from (op0 v c0).
6322   unsigned Opcode = ISD::DELETED_NODE;
6323   bool IsMulOrDiv = false;
6324   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6325   // opcode or its arithmetic (mul or udiv) variant.
6326   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6327     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6328     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6329       return false;
6330     Opcode = NeededShift;
6331     return true;
6332   };
6333   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6334   // that the needed shift can be extracted from.
6335   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6336       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6337     return SDValue();
6338 
6339   // op0 must be the same opcode on both sides, have the same LHS argument,
6340   // and produce the same value type.
6341   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6342       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6343       ShiftedVT != ExtractFrom.getValueType())
6344     return SDValue();
6345 
6346   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6347   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6348   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6349   ConstantSDNode *ExtractFromCst =
6350       isConstOrConstSplat(ExtractFrom.getOperand(1));
6351   // TODO: We should be able to handle non-uniform constant vectors for these values
6352   // Check that we have constant values.
6353   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6354       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6355       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6356     return SDValue();
6357 
6358   // Compute the shift amount we need to extract to complete the rotate.
6359   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6360   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6361     return SDValue();
6362   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6363   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6364   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6365   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6366   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6367 
6368   // Now try extract the needed shift from the ExtractFrom op and see if the
6369   // result matches up with the existing shift's LHS op.
6370   if (IsMulOrDiv) {
6371     // Op to extract from is a mul or udiv by a constant.
6372     // Check:
6373     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6374     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6375     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6376                                                  NeededShiftAmt.getZExtValue());
6377     APInt ResultAmt;
6378     APInt Rem;
6379     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6380     if (Rem != 0 || ResultAmt != OppLHSAmt)
6381       return SDValue();
6382   } else {
6383     // Op to extract from is a shift by a constant.
6384     // Check:
6385     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6386     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6387                                           ExtractFromAmt.getBitWidth()))
6388       return SDValue();
6389   }
6390 
6391   // Return the expanded shift op that should allow a rotate to be formed.
6392   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6393   EVT ResVT = ExtractFrom.getValueType();
6394   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6395   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6396 }
6397 
6398 // Return true if we can prove that, whenever Neg and Pos are both in the
6399 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6400 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6401 //
6402 //     (or (shift1 X, Neg), (shift2 X, Pos))
6403 //
6404 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6405 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6406 // to consider shift amounts with defined behavior.
6407 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6408                            SelectionDAG &DAG) {
6409   // If EltSize is a power of 2 then:
6410   //
6411   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6412   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6413   //
6414   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6415   // for the stronger condition:
6416   //
6417   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6418   //
6419   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6420   // we can just replace Neg with Neg' for the rest of the function.
6421   //
6422   // In other cases we check for the even stronger condition:
6423   //
6424   //     Neg == EltSize - Pos                                    [B]
6425   //
6426   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6427   // behavior if Pos == 0 (and consequently Neg == EltSize).
6428   //
6429   // We could actually use [A] whenever EltSize is a power of 2, but the
6430   // only extra cases that it would match are those uninteresting ones
6431   // where Neg and Pos are never in range at the same time.  E.g. for
6432   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6433   // as well as (sub 32, Pos), but:
6434   //
6435   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6436   //
6437   // always invokes undefined behavior for 32-bit X.
6438   //
6439   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6440   unsigned MaskLoBits = 0;
6441   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6442     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6443       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6444       unsigned Bits = Log2_64(EltSize);
6445       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6446           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6447         Neg = Neg.getOperand(0);
6448         MaskLoBits = Bits;
6449       }
6450     }
6451   }
6452 
6453   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6454   if (Neg.getOpcode() != ISD::SUB)
6455     return false;
6456   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6457   if (!NegC)
6458     return false;
6459   SDValue NegOp1 = Neg.getOperand(1);
6460 
6461   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6462   // Pos'.  The truncation is redundant for the purpose of the equality.
6463   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6464     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6465       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6466       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6467           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6468            MaskLoBits))
6469         Pos = Pos.getOperand(0);
6470     }
6471   }
6472 
6473   // The condition we need is now:
6474   //
6475   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6476   //
6477   // If NegOp1 == Pos then we need:
6478   //
6479   //              EltSize & Mask == NegC & Mask
6480   //
6481   // (because "x & Mask" is a truncation and distributes through subtraction).
6482   //
6483   // We also need to account for a potential truncation of NegOp1 if the amount
6484   // has already been legalized to a shift amount type.
6485   APInt Width;
6486   if ((Pos == NegOp1) ||
6487       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6488     Width = NegC->getAPIntValue();
6489 
6490   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6491   // Then the condition we want to prove becomes:
6492   //
6493   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6494   //
6495   // which, again because "x & Mask" is a truncation, becomes:
6496   //
6497   //                NegC & Mask == (EltSize - PosC) & Mask
6498   //             EltSize & Mask == (NegC + PosC) & Mask
6499   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6500     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6501       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6502     else
6503       return false;
6504   } else
6505     return false;
6506 
6507   // Now we just need to check that EltSize & Mask == Width & Mask.
6508   if (MaskLoBits)
6509     // EltSize & Mask is 0 since Mask is EltSize - 1.
6510     return Width.getLoBits(MaskLoBits) == 0;
6511   return Width == EltSize;
6512 }
6513 
6514 // A subroutine of MatchRotate used once we have found an OR of two opposite
6515 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6516 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6517 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6518 // Neg with outer conversions stripped away.
6519 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6520                                        SDValue Neg, SDValue InnerPos,
6521                                        SDValue InnerNeg, unsigned PosOpcode,
6522                                        unsigned NegOpcode, const SDLoc &DL) {
6523   // fold (or (shl x, (*ext y)),
6524   //          (srl x, (*ext (sub 32, y)))) ->
6525   //   (rotl x, y) or (rotr x, (sub 32, y))
6526   //
6527   // fold (or (shl x, (*ext (sub 32, y))),
6528   //          (srl x, (*ext y))) ->
6529   //   (rotr x, y) or (rotl x, (sub 32, y))
6530   EVT VT = Shifted.getValueType();
6531   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6532     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6533     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6534                        HasPos ? Pos : Neg);
6535   }
6536 
6537   return SDValue();
6538 }
6539 
6540 // A subroutine of MatchRotate used once we have found an OR of two opposite
6541 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
6542 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6543 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6544 // Neg with outer conversions stripped away.
6545 // TODO: Merge with MatchRotatePosNeg.
6546 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6547                                        SDValue Neg, SDValue InnerPos,
6548                                        SDValue InnerNeg, unsigned PosOpcode,
6549                                        unsigned NegOpcode, const SDLoc &DL) {
6550   EVT VT = N0.getValueType();
6551   unsigned EltBits = VT.getScalarSizeInBits();
6552 
6553   // fold (or (shl x0, (*ext y)),
6554   //          (srl x1, (*ext (sub 32, y)))) ->
6555   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6556   //
6557   // fold (or (shl x0, (*ext (sub 32, y))),
6558   //          (srl x1, (*ext y))) ->
6559   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6560   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
6561     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6562     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6563                        HasPos ? Pos : Neg);
6564   }
6565 
6566   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6567   // so for now just use the PosOpcode case if its legal.
6568   // TODO: When can we use the NegOpcode case?
6569   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6570     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6571       if (Op.getOpcode() != BinOpc)
6572         return false;
6573       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6574       return Cst && (Cst->getAPIntValue() == Imm);
6575     };
6576 
6577     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6578     //   -> (fshl x0, x1, y)
6579     if (IsBinOpImm(N1, ISD::SRL, 1) &&
6580         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6581         InnerPos == InnerNeg.getOperand(0) &&
6582         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6583       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6584     }
6585 
6586     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6587     //   -> (fshr x0, x1, y)
6588     if (IsBinOpImm(N0, ISD::SHL, 1) &&
6589         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6590         InnerNeg == InnerPos.getOperand(0) &&
6591         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6592       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6593     }
6594 
6595     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6596     //   -> (fshr x0, x1, y)
6597     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6598     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6599         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6600         InnerNeg == InnerPos.getOperand(0) &&
6601         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6602       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6603     }
6604   }
6605 
6606   return SDValue();
6607 }
6608 
6609 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6610 // idioms for rotate, and if the target supports rotation instructions, generate
6611 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6612 // with different shifted sources.
6613 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6614   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6615   EVT VT = LHS.getValueType();
6616   if (!TLI.isTypeLegal(VT))
6617     return SDValue();
6618 
6619   // The target must have at least one rotate/funnel flavor.
6620   bool HasROTL = hasOperation(ISD::ROTL, VT);
6621   bool HasROTR = hasOperation(ISD::ROTR, VT);
6622   bool HasFSHL = hasOperation(ISD::FSHL, VT);
6623   bool HasFSHR = hasOperation(ISD::FSHR, VT);
6624   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6625     return SDValue();
6626 
6627   // Check for truncated rotate.
6628   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6629       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6630     assert(LHS.getValueType() == RHS.getValueType());
6631     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6632       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6633     }
6634   }
6635 
6636   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6637   SDValue LHSShift;   // The shift.
6638   SDValue LHSMask;    // AND value if any.
6639   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6640 
6641   SDValue RHSShift;   // The shift.
6642   SDValue RHSMask;    // AND value if any.
6643   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6644 
6645   // If neither side matched a rotate half, bail
6646   if (!LHSShift && !RHSShift)
6647     return SDValue();
6648 
6649   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6650   // side of the rotate, so try to handle that here. In all cases we need to
6651   // pass the matched shift from the opposite side to compute the opcode and
6652   // needed shift amount to extract.  We still want to do this if both sides
6653   // matched a rotate half because one half may be a potential overshift that
6654   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6655   // single one).
6656 
6657   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6658   if (LHSShift)
6659     if (SDValue NewRHSShift =
6660             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6661       RHSShift = NewRHSShift;
6662   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6663   if (RHSShift)
6664     if (SDValue NewLHSShift =
6665             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6666       LHSShift = NewLHSShift;
6667 
6668   // If a side is still missing, nothing else we can do.
6669   if (!RHSShift || !LHSShift)
6670     return SDValue();
6671 
6672   // At this point we've matched or extracted a shift op on each side.
6673 
6674   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6675     return SDValue(); // Shifts must disagree.
6676 
6677   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6678   if (!IsRotate && !(HasFSHL || HasFSHR))
6679     return SDValue(); // Requires funnel shift support.
6680 
6681   // Canonicalize shl to left side in a shl/srl pair.
6682   if (RHSShift.getOpcode() == ISD::SHL) {
6683     std::swap(LHS, RHS);
6684     std::swap(LHSShift, RHSShift);
6685     std::swap(LHSMask, RHSMask);
6686   }
6687 
6688   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6689   SDValue LHSShiftArg = LHSShift.getOperand(0);
6690   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6691   SDValue RHSShiftArg = RHSShift.getOperand(0);
6692   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6693 
6694   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6695   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6696   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
6697   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
6698   // iff C1+C2 == EltSizeInBits
6699   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6700                                         ConstantSDNode *RHS) {
6701     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6702   };
6703   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6704     SDValue Res;
6705     if (IsRotate && (HasROTL || HasROTR))
6706       Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
6707                         HasROTL ? LHSShiftAmt : RHSShiftAmt);
6708     else
6709       Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
6710                         RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
6711 
6712     // If there is an AND of either shifted operand, apply it to the result.
6713     if (LHSMask.getNode() || RHSMask.getNode()) {
6714       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6715       SDValue Mask = AllOnes;
6716 
6717       if (LHSMask.getNode()) {
6718         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6719         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6720                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6721       }
6722       if (RHSMask.getNode()) {
6723         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6724         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6725                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6726       }
6727 
6728       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
6729     }
6730 
6731     return Res;
6732   }
6733 
6734   // If there is a mask here, and we have a variable shift, we can't be sure
6735   // that we're masking out the right stuff.
6736   if (LHSMask.getNode() || RHSMask.getNode())
6737     return SDValue();
6738 
6739   // If the shift amount is sign/zext/any-extended just peel it off.
6740   SDValue LExtOp0 = LHSShiftAmt;
6741   SDValue RExtOp0 = RHSShiftAmt;
6742   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6743        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6744        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6745        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6746       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6747        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6748        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6749        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6750     LExtOp0 = LHSShiftAmt.getOperand(0);
6751     RExtOp0 = RHSShiftAmt.getOperand(0);
6752   }
6753 
6754   if (IsRotate && (HasROTL || HasROTR)) {
6755     SDValue TryL =
6756         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
6757                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6758     if (TryL)
6759       return TryL;
6760 
6761     SDValue TryR =
6762         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
6763                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6764     if (TryR)
6765       return TryR;
6766   }
6767 
6768   SDValue TryL =
6769       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6770                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
6771   if (TryL)
6772     return TryL;
6773 
6774   SDValue TryR =
6775       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6776                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
6777   if (TryR)
6778     return TryR;
6779 
6780   return SDValue();
6781 }
6782 
6783 namespace {
6784 
6785 /// Represents known origin of an individual byte in load combine pattern. The
6786 /// value of the byte is either constant zero or comes from memory.
6787 struct ByteProvider {
6788   // For constant zero providers Load is set to nullptr. For memory providers
6789   // Load represents the node which loads the byte from memory.
6790   // ByteOffset is the offset of the byte in the value produced by the load.
6791   LoadSDNode *Load = nullptr;
6792   unsigned ByteOffset = 0;
6793 
6794   ByteProvider() = default;
6795 
6796   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6797     return ByteProvider(Load, ByteOffset);
6798   }
6799 
6800   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6801 
6802   bool isConstantZero() const { return !Load; }
6803   bool isMemory() const { return Load; }
6804 
6805   bool operator==(const ByteProvider &Other) const {
6806     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6807   }
6808 
6809 private:
6810   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6811       : Load(Load), ByteOffset(ByteOffset) {}
6812 };
6813 
6814 } // end anonymous namespace
6815 
6816 /// Recursively traverses the expression calculating the origin of the requested
6817 /// byte of the given value. Returns None if the provider can't be calculated.
6818 ///
6819 /// For all the values except the root of the expression verifies that the value
6820 /// has exactly one use and if it's not true return None. This way if the origin
6821 /// of the byte is returned it's guaranteed that the values which contribute to
6822 /// the byte are not used outside of this expression.
6823 ///
6824 /// Because the parts of the expression are not allowed to have more than one
6825 /// use this function iterates over trees, not DAGs. So it never visits the same
6826 /// node more than once.
6827 static const Optional<ByteProvider>
6828 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6829                       bool Root = false) {
6830   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6831   if (Depth == 10)
6832     return None;
6833 
6834   if (!Root && !Op.hasOneUse())
6835     return None;
6836 
6837   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6838   unsigned BitWidth = Op.getValueSizeInBits();
6839   if (BitWidth % 8 != 0)
6840     return None;
6841   unsigned ByteWidth = BitWidth / 8;
6842   assert(Index < ByteWidth && "invalid index requested");
6843   (void) ByteWidth;
6844 
6845   switch (Op.getOpcode()) {
6846   case ISD::OR: {
6847     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6848     if (!LHS)
6849       return None;
6850     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6851     if (!RHS)
6852       return None;
6853 
6854     if (LHS->isConstantZero())
6855       return RHS;
6856     if (RHS->isConstantZero())
6857       return LHS;
6858     return None;
6859   }
6860   case ISD::SHL: {
6861     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6862     if (!ShiftOp)
6863       return None;
6864 
6865     uint64_t BitShift = ShiftOp->getZExtValue();
6866     if (BitShift % 8 != 0)
6867       return None;
6868     uint64_t ByteShift = BitShift / 8;
6869 
6870     return Index < ByteShift
6871                ? ByteProvider::getConstantZero()
6872                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6873                                        Depth + 1);
6874   }
6875   case ISD::ANY_EXTEND:
6876   case ISD::SIGN_EXTEND:
6877   case ISD::ZERO_EXTEND: {
6878     SDValue NarrowOp = Op->getOperand(0);
6879     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6880     if (NarrowBitWidth % 8 != 0)
6881       return None;
6882     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6883 
6884     if (Index >= NarrowByteWidth)
6885       return Op.getOpcode() == ISD::ZERO_EXTEND
6886                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6887                  : None;
6888     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6889   }
6890   case ISD::BSWAP:
6891     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6892                                  Depth + 1);
6893   case ISD::LOAD: {
6894     auto L = cast<LoadSDNode>(Op.getNode());
6895     if (!L->isSimple() || L->isIndexed())
6896       return None;
6897 
6898     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6899     if (NarrowBitWidth % 8 != 0)
6900       return None;
6901     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6902 
6903     if (Index >= NarrowByteWidth)
6904       return L->getExtensionType() == ISD::ZEXTLOAD
6905                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6906                  : None;
6907     return ByteProvider::getMemory(L, Index);
6908   }
6909   }
6910 
6911   return None;
6912 }
6913 
6914 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
6915   return i;
6916 }
6917 
6918 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
6919   return BW - i - 1;
6920 }
6921 
6922 // Check if the bytes offsets we are looking at match with either big or
6923 // little endian value loaded. Return true for big endian, false for little
6924 // endian, and None if match failed.
6925 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
6926                                   int64_t FirstOffset) {
6927   // The endian can be decided only when it is 2 bytes at least.
6928   unsigned Width = ByteOffsets.size();
6929   if (Width < 2)
6930     return None;
6931 
6932   bool BigEndian = true, LittleEndian = true;
6933   for (unsigned i = 0; i < Width; i++) {
6934     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6935     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
6936     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
6937     if (!BigEndian && !LittleEndian)
6938       return None;
6939   }
6940 
6941   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6942                                         "little endian");
6943   return BigEndian;
6944 }
6945 
6946 static SDValue stripTruncAndExt(SDValue Value) {
6947   switch (Value.getOpcode()) {
6948   case ISD::TRUNCATE:
6949   case ISD::ZERO_EXTEND:
6950   case ISD::SIGN_EXTEND:
6951   case ISD::ANY_EXTEND:
6952     return stripTruncAndExt(Value.getOperand(0));
6953   }
6954   return Value;
6955 }
6956 
6957 /// Match a pattern where a wide type scalar value is stored by several narrow
6958 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6959 /// supports it.
6960 ///
6961 /// Assuming little endian target:
6962 ///  i8 *p = ...
6963 ///  i32 val = ...
6964 ///  p[0] = (val >> 0) & 0xFF;
6965 ///  p[1] = (val >> 8) & 0xFF;
6966 ///  p[2] = (val >> 16) & 0xFF;
6967 ///  p[3] = (val >> 24) & 0xFF;
6968 /// =>
6969 ///  *((i32)p) = val;
6970 ///
6971 ///  i8 *p = ...
6972 ///  i32 val = ...
6973 ///  p[0] = (val >> 24) & 0xFF;
6974 ///  p[1] = (val >> 16) & 0xFF;
6975 ///  p[2] = (val >> 8) & 0xFF;
6976 ///  p[3] = (val >> 0) & 0xFF;
6977 /// =>
6978 ///  *((i32)p) = BSWAP(val);
6979 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
6980   // The matching looks for "store (trunc x)" patterns that appear early but are
6981   // likely to be replaced by truncating store nodes during combining.
6982   // TODO: If there is evidence that running this later would help, this
6983   //       limitation could be removed. Legality checks may need to be added
6984   //       for the created store and optional bswap/rotate.
6985   if (LegalOperations)
6986     return SDValue();
6987 
6988   // Collect all the stores in the chain.
6989   SDValue Chain;
6990   SmallVector<StoreSDNode *, 8> Stores;
6991   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6992     // TODO: Allow unordered atomics when wider type is legal (see D66309)
6993     EVT MemVT = Store->getMemoryVT();
6994     if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
6995         !Store->isSimple() || Store->isIndexed())
6996       return SDValue();
6997     Stores.push_back(Store);
6998     Chain = Store->getChain();
6999   }
7000   // There is no reason to continue if we do not have at least a pair of stores.
7001   if (Stores.size() < 2)
7002     return SDValue();
7003 
7004   // Handle simple types only.
7005   LLVMContext &Context = *DAG.getContext();
7006   unsigned NumStores = Stores.size();
7007   unsigned NarrowNumBits = N->getMemoryVT().getSizeInBits();
7008   unsigned WideNumBits = NumStores * NarrowNumBits;
7009   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7010   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7011     return SDValue();
7012 
7013   // Check if all bytes of the source value that we are looking at are stored
7014   // to the same base address. Collect offsets from Base address into OffsetMap.
7015   SDValue SourceValue;
7016   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7017   int64_t FirstOffset = INT64_MAX;
7018   StoreSDNode *FirstStore = nullptr;
7019   Optional<BaseIndexOffset> Base;
7020   for (auto Store : Stores) {
7021     // All the stores store different parts of the CombinedValue. A truncate is
7022     // required to get the partial value.
7023     SDValue Trunc = Store->getValue();
7024     if (Trunc.getOpcode() != ISD::TRUNCATE)
7025       return SDValue();
7026     // Other than the first/last part, a shift operation is required to get the
7027     // offset.
7028     int64_t Offset = 0;
7029     SDValue WideVal = Trunc.getOperand(0);
7030     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7031         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7032       // The shift amount must be a constant multiple of the narrow type.
7033       // It is translated to the offset address in the wide source value "y".
7034       //
7035       // x = srl y, ShiftAmtC
7036       // i8 z = trunc x
7037       // store z, ...
7038       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7039       if (ShiftAmtC % NarrowNumBits != 0)
7040         return SDValue();
7041 
7042       Offset = ShiftAmtC / NarrowNumBits;
7043       WideVal = WideVal.getOperand(0);
7044     }
7045 
7046     // Stores must share the same source value with different offsets.
7047     // Truncate and extends should be stripped to get the single source value.
7048     if (!SourceValue)
7049       SourceValue = WideVal;
7050     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7051       return SDValue();
7052     else if (SourceValue.getValueType() != WideVT) {
7053       if (WideVal.getValueType() == WideVT ||
7054           WideVal.getValueSizeInBits() > SourceValue.getValueSizeInBits())
7055         SourceValue = WideVal;
7056       // Give up if the source value type is smaller than the store size.
7057       if (SourceValue.getValueSizeInBits() < WideVT.getSizeInBits())
7058         return SDValue();
7059     }
7060 
7061     // Stores must share the same base address.
7062     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7063     int64_t ByteOffsetFromBase = 0;
7064     if (!Base)
7065       Base = Ptr;
7066     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7067       return SDValue();
7068 
7069     // Remember the first store.
7070     if (ByteOffsetFromBase < FirstOffset) {
7071       FirstStore = Store;
7072       FirstOffset = ByteOffsetFromBase;
7073     }
7074     // Map the offset in the store and the offset in the combined value, and
7075     // early return if it has been set before.
7076     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7077       return SDValue();
7078     OffsetMap[Offset] = ByteOffsetFromBase;
7079   }
7080 
7081   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7082   assert(FirstStore && "First store must be set");
7083 
7084   // Check that a store of the wide type is both allowed and fast on the target
7085   const DataLayout &Layout = DAG.getDataLayout();
7086   bool Fast = false;
7087   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7088                                         *FirstStore->getMemOperand(), &Fast);
7089   if (!Allowed || !Fast)
7090     return SDValue();
7091 
7092   // Check if the pieces of the value are going to the expected places in memory
7093   // to merge the stores.
7094   auto checkOffsets = [&](bool MatchLittleEndian) {
7095     if (MatchLittleEndian) {
7096       for (unsigned i = 0; i != NumStores; ++i)
7097         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7098           return false;
7099     } else { // MatchBigEndian by reversing loop counter.
7100       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7101         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7102           return false;
7103     }
7104     return true;
7105   };
7106 
7107   // Check if the offsets line up for the native data layout of this target.
7108   bool NeedBswap = false;
7109   bool NeedRotate = false;
7110   if (!checkOffsets(Layout.isLittleEndian())) {
7111     // Special-case: check if byte offsets line up for the opposite endian.
7112     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7113       NeedBswap = true;
7114     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7115       NeedRotate = true;
7116     else
7117       return SDValue();
7118   }
7119 
7120   SDLoc DL(N);
7121   if (WideVT != SourceValue.getValueType()) {
7122     assert(SourceValue.getValueType().getSizeInBits() > WideNumBits &&
7123            "Unexpected store value to merge");
7124     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7125   }
7126 
7127   // Before legalize we can introduce illegal bswaps/rotates which will be later
7128   // converted to an explicit bswap sequence. This way we end up with a single
7129   // store and byte shuffling instead of several stores and byte shuffling.
7130   if (NeedBswap) {
7131     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7132   } else if (NeedRotate) {
7133     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7134     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7135     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7136   }
7137 
7138   SDValue NewStore =
7139       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7140                    FirstStore->getPointerInfo(), FirstStore->getAlign());
7141 
7142   // Rely on other DAG combine rules to remove the other individual stores.
7143   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7144   return NewStore;
7145 }
7146 
7147 /// Match a pattern where a wide type scalar value is loaded by several narrow
7148 /// loads and combined by shifts and ors. Fold it into a single load or a load
7149 /// and a BSWAP if the targets supports it.
7150 ///
7151 /// Assuming little endian target:
7152 ///  i8 *a = ...
7153 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7154 /// =>
7155 ///  i32 val = *((i32)a)
7156 ///
7157 ///  i8 *a = ...
7158 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7159 /// =>
7160 ///  i32 val = BSWAP(*((i32)a))
7161 ///
7162 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7163 /// interact well with the worklist mechanism. When a part of the pattern is
7164 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7165 /// but the root node of the pattern which triggers the load combine is not
7166 /// necessarily a direct user of the changed node. For example, once the address
7167 /// of t28 load is reassociated load combine won't be triggered:
7168 ///             t25: i32 = add t4, Constant:i32<2>
7169 ///           t26: i64 = sign_extend t25
7170 ///        t27: i64 = add t2, t26
7171 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7172 ///     t29: i32 = zero_extend t28
7173 ///   t32: i32 = shl t29, Constant:i8<8>
7174 /// t33: i32 = or t23, t32
7175 /// As a possible fix visitLoad can check if the load can be a part of a load
7176 /// combine pattern and add corresponding OR roots to the worklist.
7177 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7178   assert(N->getOpcode() == ISD::OR &&
7179          "Can only match load combining against OR nodes");
7180 
7181   // Handles simple types only
7182   EVT VT = N->getValueType(0);
7183   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7184     return SDValue();
7185   unsigned ByteWidth = VT.getSizeInBits() / 8;
7186 
7187   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7188   auto MemoryByteOffset = [&] (ByteProvider P) {
7189     assert(P.isMemory() && "Must be a memory byte provider");
7190     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7191     assert(LoadBitWidth % 8 == 0 &&
7192            "can only analyze providers for individual bytes not bit");
7193     unsigned LoadByteWidth = LoadBitWidth / 8;
7194     return IsBigEndianTarget
7195             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7196             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7197   };
7198 
7199   Optional<BaseIndexOffset> Base;
7200   SDValue Chain;
7201 
7202   SmallPtrSet<LoadSDNode *, 8> Loads;
7203   Optional<ByteProvider> FirstByteProvider;
7204   int64_t FirstOffset = INT64_MAX;
7205 
7206   // Check if all the bytes of the OR we are looking at are loaded from the same
7207   // base address. Collect bytes offsets from Base address in ByteOffsets.
7208   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7209   unsigned ZeroExtendedBytes = 0;
7210   for (int i = ByteWidth - 1; i >= 0; --i) {
7211     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7212     if (!P)
7213       return SDValue();
7214 
7215     if (P->isConstantZero()) {
7216       // It's OK for the N most significant bytes to be 0, we can just
7217       // zero-extend the load.
7218       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7219         return SDValue();
7220       continue;
7221     }
7222     assert(P->isMemory() && "provenance should either be memory or zero");
7223 
7224     LoadSDNode *L = P->Load;
7225     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7226            !L->isIndexed() &&
7227            "Must be enforced by calculateByteProvider");
7228     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7229 
7230     // All loads must share the same chain
7231     SDValue LChain = L->getChain();
7232     if (!Chain)
7233       Chain = LChain;
7234     else if (Chain != LChain)
7235       return SDValue();
7236 
7237     // Loads must share the same base address
7238     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7239     int64_t ByteOffsetFromBase = 0;
7240     if (!Base)
7241       Base = Ptr;
7242     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7243       return SDValue();
7244 
7245     // Calculate the offset of the current byte from the base address
7246     ByteOffsetFromBase += MemoryByteOffset(*P);
7247     ByteOffsets[i] = ByteOffsetFromBase;
7248 
7249     // Remember the first byte load
7250     if (ByteOffsetFromBase < FirstOffset) {
7251       FirstByteProvider = P;
7252       FirstOffset = ByteOffsetFromBase;
7253     }
7254 
7255     Loads.insert(L);
7256   }
7257   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7258          "memory, so there must be at least one load which produces the value");
7259   assert(Base && "Base address of the accessed memory location must be set");
7260   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7261 
7262   bool NeedsZext = ZeroExtendedBytes > 0;
7263 
7264   EVT MemVT =
7265       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7266 
7267   if (!MemVT.isSimple())
7268     return SDValue();
7269 
7270   // Before legalize we can introduce too wide illegal loads which will be later
7271   // split into legal sized loads. This enables us to combine i64 load by i8
7272   // patterns to a couple of i32 loads on 32 bit targets.
7273   if (LegalOperations &&
7274       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7275                             MemVT))
7276     return SDValue();
7277 
7278   // Check if the bytes of the OR we are looking at match with either big or
7279   // little endian value load
7280   Optional<bool> IsBigEndian = isBigEndian(
7281       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7282   if (!IsBigEndian.hasValue())
7283     return SDValue();
7284 
7285   assert(FirstByteProvider && "must be set");
7286 
7287   // Ensure that the first byte is loaded from zero offset of the first load.
7288   // So the combined value can be loaded from the first load address.
7289   if (MemoryByteOffset(*FirstByteProvider) != 0)
7290     return SDValue();
7291   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7292 
7293   // The node we are looking at matches with the pattern, check if we can
7294   // replace it with a single (possibly zero-extended) load and bswap + shift if
7295   // needed.
7296 
7297   // If the load needs byte swap check if the target supports it
7298   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7299 
7300   // Before legalize we can introduce illegal bswaps which will be later
7301   // converted to an explicit bswap sequence. This way we end up with a single
7302   // load and byte shuffling instead of several loads and byte shuffling.
7303   // We do not introduce illegal bswaps when zero-extending as this tends to
7304   // introduce too many arithmetic instructions.
7305   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7306       !TLI.isOperationLegal(ISD::BSWAP, VT))
7307     return SDValue();
7308 
7309   // If we need to bswap and zero extend, we have to insert a shift. Check that
7310   // it is legal.
7311   if (NeedsBswap && NeedsZext && LegalOperations &&
7312       !TLI.isOperationLegal(ISD::SHL, VT))
7313     return SDValue();
7314 
7315   // Check that a load of the wide type is both allowed and fast on the target
7316   bool Fast = false;
7317   bool Allowed =
7318       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7319                              *FirstLoad->getMemOperand(), &Fast);
7320   if (!Allowed || !Fast)
7321     return SDValue();
7322 
7323   SDValue NewLoad =
7324       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7325                      Chain, FirstLoad->getBasePtr(),
7326                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7327 
7328   // Transfer chain users from old loads to the new load.
7329   for (LoadSDNode *L : Loads)
7330     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7331 
7332   if (!NeedsBswap)
7333     return NewLoad;
7334 
7335   SDValue ShiftedLoad =
7336       NeedsZext
7337           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7338                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7339                                                    SDLoc(N), LegalOperations))
7340           : NewLoad;
7341   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7342 }
7343 
7344 // If the target has andn, bsl, or a similar bit-select instruction,
7345 // we want to unfold masked merge, with canonical pattern of:
7346 //   |        A  |  |B|
7347 //   ((x ^ y) & m) ^ y
7348 //    |  D  |
7349 // Into:
7350 //   (x & m) | (y & ~m)
7351 // If y is a constant, and the 'andn' does not work with immediates,
7352 // we unfold into a different pattern:
7353 //   ~(~x & m) & (m | y)
7354 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7355 //       the very least that breaks andnpd / andnps patterns, and because those
7356 //       patterns are simplified in IR and shouldn't be created in the DAG
7357 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7358   assert(N->getOpcode() == ISD::XOR);
7359 
7360   // Don't touch 'not' (i.e. where y = -1).
7361   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7362     return SDValue();
7363 
7364   EVT VT = N->getValueType(0);
7365 
7366   // There are 3 commutable operators in the pattern,
7367   // so we have to deal with 8 possible variants of the basic pattern.
7368   SDValue X, Y, M;
7369   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7370     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7371       return false;
7372     SDValue Xor = And.getOperand(XorIdx);
7373     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7374       return false;
7375     SDValue Xor0 = Xor.getOperand(0);
7376     SDValue Xor1 = Xor.getOperand(1);
7377     // Don't touch 'not' (i.e. where y = -1).
7378     if (isAllOnesOrAllOnesSplat(Xor1))
7379       return false;
7380     if (Other == Xor0)
7381       std::swap(Xor0, Xor1);
7382     if (Other != Xor1)
7383       return false;
7384     X = Xor0;
7385     Y = Xor1;
7386     M = And.getOperand(XorIdx ? 0 : 1);
7387     return true;
7388   };
7389 
7390   SDValue N0 = N->getOperand(0);
7391   SDValue N1 = N->getOperand(1);
7392   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7393       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7394     return SDValue();
7395 
7396   // Don't do anything if the mask is constant. This should not be reachable.
7397   // InstCombine should have already unfolded this pattern, and DAGCombiner
7398   // probably shouldn't produce it, too.
7399   if (isa<ConstantSDNode>(M.getNode()))
7400     return SDValue();
7401 
7402   // We can transform if the target has AndNot
7403   if (!TLI.hasAndNot(M))
7404     return SDValue();
7405 
7406   SDLoc DL(N);
7407 
7408   // If Y is a constant, check that 'andn' works with immediates.
7409   if (!TLI.hasAndNot(Y)) {
7410     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7411     // If not, we need to do a bit more work to make sure andn is still used.
7412     SDValue NotX = DAG.getNOT(DL, X, VT);
7413     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7414     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7415     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7416     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7417   }
7418 
7419   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7420   SDValue NotM = DAG.getNOT(DL, M, VT);
7421   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7422 
7423   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7424 }
7425 
7426 SDValue DAGCombiner::visitXOR(SDNode *N) {
7427   SDValue N0 = N->getOperand(0);
7428   SDValue N1 = N->getOperand(1);
7429   EVT VT = N0.getValueType();
7430 
7431   // fold vector ops
7432   if (VT.isVector()) {
7433     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7434       return FoldedVOp;
7435 
7436     // fold (xor x, 0) -> x, vector edition
7437     if (ISD::isBuildVectorAllZeros(N0.getNode()))
7438       return N1;
7439     if (ISD::isBuildVectorAllZeros(N1.getNode()))
7440       return N0;
7441   }
7442 
7443   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7444   SDLoc DL(N);
7445   if (N0.isUndef() && N1.isUndef())
7446     return DAG.getConstant(0, DL, VT);
7447 
7448   // fold (xor x, undef) -> undef
7449   if (N0.isUndef())
7450     return N0;
7451   if (N1.isUndef())
7452     return N1;
7453 
7454   // fold (xor c1, c2) -> c1^c2
7455   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7456     return C;
7457 
7458   // canonicalize constant to RHS
7459   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7460      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7461     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7462 
7463   // fold (xor x, 0) -> x
7464   if (isNullConstant(N1))
7465     return N0;
7466 
7467   if (SDValue NewSel = foldBinOpIntoSelect(N))
7468     return NewSel;
7469 
7470   // reassociate xor
7471   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7472     return RXOR;
7473 
7474   // fold !(x cc y) -> (x !cc y)
7475   unsigned N0Opcode = N0.getOpcode();
7476   SDValue LHS, RHS, CC;
7477   if (TLI.isConstTrueVal(N1.getNode()) &&
7478       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7479     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7480                                                LHS.getValueType());
7481     if (!LegalOperations ||
7482         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7483       switch (N0Opcode) {
7484       default:
7485         llvm_unreachable("Unhandled SetCC Equivalent!");
7486       case ISD::SETCC:
7487         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7488       case ISD::SELECT_CC:
7489         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7490                                N0.getOperand(3), NotCC);
7491       case ISD::STRICT_FSETCC:
7492       case ISD::STRICT_FSETCCS: {
7493         if (N0.hasOneUse()) {
7494           // FIXME Can we handle multiple uses? Could we token factor the chain
7495           // results from the new/old setcc?
7496           SDValue SetCC =
7497               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7498                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7499           CombineTo(N, SetCC);
7500           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7501           recursivelyDeleteUnusedNodes(N0.getNode());
7502           return SDValue(N, 0); // Return N so it doesn't get rechecked!
7503         }
7504         break;
7505       }
7506       }
7507     }
7508   }
7509 
7510   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7511   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7512       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7513     SDValue V = N0.getOperand(0);
7514     SDLoc DL0(N0);
7515     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7516                     DAG.getConstant(1, DL0, V.getValueType()));
7517     AddToWorklist(V.getNode());
7518     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7519   }
7520 
7521   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7522   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7523       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7524     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7525     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7526       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7527       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7528       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7529       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7530       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7531     }
7532   }
7533   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7534   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7535       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7536     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7537     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7538       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7539       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7540       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7541       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7542       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7543     }
7544   }
7545 
7546   // fold (not (neg x)) -> (add X, -1)
7547   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7548   // Y is a constant or the subtract has a single use.
7549   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7550       isNullConstant(N0.getOperand(0))) {
7551     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7552                        DAG.getAllOnesConstant(DL, VT));
7553   }
7554 
7555   // fold (not (add X, -1)) -> (neg X)
7556   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7557       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7558     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7559                        N0.getOperand(0));
7560   }
7561 
7562   // fold (xor (and x, y), y) -> (and (not x), y)
7563   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7564     SDValue X = N0.getOperand(0);
7565     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7566     AddToWorklist(NotX.getNode());
7567     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7568   }
7569 
7570   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7571     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7572     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7573     unsigned BitWidth = VT.getScalarSizeInBits();
7574     if (XorC && ShiftC) {
7575       // Don't crash on an oversized shift. We can not guarantee that a bogus
7576       // shift has been simplified to undef.
7577       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7578       if (ShiftAmt < BitWidth) {
7579         APInt Ones = APInt::getAllOnesValue(BitWidth);
7580         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7581         if (XorC->getAPIntValue() == Ones) {
7582           // If the xor constant is a shifted -1, do a 'not' before the shift:
7583           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7584           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7585           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7586           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7587         }
7588       }
7589     }
7590   }
7591 
7592   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7593   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7594     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7595     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7596     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7597       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7598       SDValue S0 = S.getOperand(0);
7599       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7600         unsigned OpSizeInBits = VT.getScalarSizeInBits();
7601         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7602           if (C->getAPIntValue() == (OpSizeInBits - 1))
7603             return DAG.getNode(ISD::ABS, DL, VT, S0);
7604       }
7605     }
7606   }
7607 
7608   // fold (xor x, x) -> 0
7609   if (N0 == N1)
7610     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7611 
7612   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7613   // Here is a concrete example of this equivalence:
7614   // i16   x ==  14
7615   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7616   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7617   //
7618   // =>
7619   //
7620   // i16     ~1      == 0b1111111111111110
7621   // i16 rol(~1, 14) == 0b1011111111111111
7622   //
7623   // Some additional tips to help conceptualize this transform:
7624   // - Try to see the operation as placing a single zero in a value of all ones.
7625   // - There exists no value for x which would allow the result to contain zero.
7626   // - Values of x larger than the bitwidth are undefined and do not require a
7627   //   consistent result.
7628   // - Pushing the zero left requires shifting one bits in from the right.
7629   // A rotate left of ~1 is a nice way of achieving the desired result.
7630   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7631       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7632     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7633                        N0.getOperand(1));
7634   }
7635 
7636   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7637   if (N0Opcode == N1.getOpcode())
7638     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7639       return V;
7640 
7641   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7642   if (SDValue MM = unfoldMaskedMerge(N))
7643     return MM;
7644 
7645   // Simplify the expression using non-local knowledge.
7646   if (SimplifyDemandedBits(SDValue(N, 0)))
7647     return SDValue(N, 0);
7648 
7649   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7650     return Combined;
7651 
7652   return SDValue();
7653 }
7654 
7655 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7656 /// shift-by-constant operand with identical opcode, we may be able to convert
7657 /// that into 2 independent shifts followed by the logic op. This is a
7658 /// throughput improvement.
7659 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7660   // Match a one-use bitwise logic op.
7661   SDValue LogicOp = Shift->getOperand(0);
7662   if (!LogicOp.hasOneUse())
7663     return SDValue();
7664 
7665   unsigned LogicOpcode = LogicOp.getOpcode();
7666   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7667       LogicOpcode != ISD::XOR)
7668     return SDValue();
7669 
7670   // Find a matching one-use shift by constant.
7671   unsigned ShiftOpcode = Shift->getOpcode();
7672   SDValue C1 = Shift->getOperand(1);
7673   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7674   assert(C1Node && "Expected a shift with constant operand");
7675   const APInt &C1Val = C1Node->getAPIntValue();
7676   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7677                              const APInt *&ShiftAmtVal) {
7678     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7679       return false;
7680 
7681     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7682     if (!ShiftCNode)
7683       return false;
7684 
7685     // Capture the shifted operand and shift amount value.
7686     ShiftOp = V.getOperand(0);
7687     ShiftAmtVal = &ShiftCNode->getAPIntValue();
7688 
7689     // Shift amount types do not have to match their operand type, so check that
7690     // the constants are the same width.
7691     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7692       return false;
7693 
7694     // The fold is not valid if the sum of the shift values exceeds bitwidth.
7695     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7696       return false;
7697 
7698     return true;
7699   };
7700 
7701   // Logic ops are commutative, so check each operand for a match.
7702   SDValue X, Y;
7703   const APInt *C0Val;
7704   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7705     Y = LogicOp.getOperand(1);
7706   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7707     Y = LogicOp.getOperand(0);
7708   else
7709     return SDValue();
7710 
7711   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7712   SDLoc DL(Shift);
7713   EVT VT = Shift->getValueType(0);
7714   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7715   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7716   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7717   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7718   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7719 }
7720 
7721 /// Handle transforms common to the three shifts, when the shift amount is a
7722 /// constant.
7723 /// We are looking for: (shift being one of shl/sra/srl)
7724 ///   shift (binop X, C0), C1
7725 /// And want to transform into:
7726 ///   binop (shift X, C1), (shift C0, C1)
7727 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7728   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
7729 
7730   // Do not turn a 'not' into a regular xor.
7731   if (isBitwiseNot(N->getOperand(0)))
7732     return SDValue();
7733 
7734   // The inner binop must be one-use, since we want to replace it.
7735   SDValue LHS = N->getOperand(0);
7736   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7737     return SDValue();
7738 
7739   // TODO: This is limited to early combining because it may reveal regressions
7740   //       otherwise. But since we just checked a target hook to see if this is
7741   //       desirable, that should have filtered out cases where this interferes
7742   //       with some other pattern matching.
7743   if (!LegalTypes)
7744     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7745       return R;
7746 
7747   // We want to pull some binops through shifts, so that we have (and (shift))
7748   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7749   // thing happens with address calculations, so it's important to canonicalize
7750   // it.
7751   switch (LHS.getOpcode()) {
7752   default:
7753     return SDValue();
7754   case ISD::OR:
7755   case ISD::XOR:
7756   case ISD::AND:
7757     break;
7758   case ISD::ADD:
7759     if (N->getOpcode() != ISD::SHL)
7760       return SDValue(); // only shl(add) not sr[al](add).
7761     break;
7762   }
7763 
7764   // We require the RHS of the binop to be a constant and not opaque as well.
7765   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7766   if (!BinOpCst)
7767     return SDValue();
7768 
7769   // FIXME: disable this unless the input to the binop is a shift by a constant
7770   // or is copy/select. Enable this in other cases when figure out it's exactly
7771   // profitable.
7772   SDValue BinOpLHSVal = LHS.getOperand(0);
7773   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7774                             BinOpLHSVal.getOpcode() == ISD::SRA ||
7775                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
7776                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7777   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7778                         BinOpLHSVal.getOpcode() == ISD::SELECT;
7779 
7780   if (!IsShiftByConstant && !IsCopyOrSelect)
7781     return SDValue();
7782 
7783   if (IsCopyOrSelect && N->hasOneUse())
7784     return SDValue();
7785 
7786   // Fold the constants, shifting the binop RHS by the shift amount.
7787   SDLoc DL(N);
7788   EVT VT = N->getValueType(0);
7789   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7790                                N->getOperand(1));
7791   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7792 
7793   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7794                                  N->getOperand(1));
7795   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7796 }
7797 
7798 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7799   assert(N->getOpcode() == ISD::TRUNCATE);
7800   assert(N->getOperand(0).getOpcode() == ISD::AND);
7801 
7802   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7803   EVT TruncVT = N->getValueType(0);
7804   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7805       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7806     SDValue N01 = N->getOperand(0).getOperand(1);
7807     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7808       SDLoc DL(N);
7809       SDValue N00 = N->getOperand(0).getOperand(0);
7810       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7811       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7812       AddToWorklist(Trunc00.getNode());
7813       AddToWorklist(Trunc01.getNode());
7814       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7815     }
7816   }
7817 
7818   return SDValue();
7819 }
7820 
7821 SDValue DAGCombiner::visitRotate(SDNode *N) {
7822   SDLoc dl(N);
7823   SDValue N0 = N->getOperand(0);
7824   SDValue N1 = N->getOperand(1);
7825   EVT VT = N->getValueType(0);
7826   unsigned Bitsize = VT.getScalarSizeInBits();
7827 
7828   // fold (rot x, 0) -> x
7829   if (isNullOrNullSplat(N1))
7830     return N0;
7831 
7832   // fold (rot x, c) -> x iff (c % BitSize) == 0
7833   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7834     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7835     if (DAG.MaskedValueIsZero(N1, ModuloMask))
7836       return N0;
7837   }
7838 
7839   // fold (rot x, c) -> (rot x, c % BitSize)
7840   bool OutOfRange = false;
7841   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
7842     OutOfRange |= C->getAPIntValue().uge(Bitsize);
7843     return true;
7844   };
7845   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
7846     EVT AmtVT = N1.getValueType();
7847     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
7848     if (SDValue Amt =
7849             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
7850       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
7851   }
7852 
7853   // rot i16 X, 8 --> bswap X
7854   auto *RotAmtC = isConstOrConstSplat(N1);
7855   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
7856       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
7857     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
7858 
7859   // Simplify the operands using demanded-bits information.
7860   if (SimplifyDemandedBits(SDValue(N, 0)))
7861     return SDValue(N, 0);
7862 
7863   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7864   if (N1.getOpcode() == ISD::TRUNCATE &&
7865       N1.getOperand(0).getOpcode() == ISD::AND) {
7866     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7867       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7868   }
7869 
7870   unsigned NextOp = N0.getOpcode();
7871   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7872   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7873     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7874     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7875     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7876       EVT ShiftVT = C1->getValueType(0);
7877       bool SameSide = (N->getOpcode() == NextOp);
7878       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7879       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
7880               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
7881         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7882         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7883             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
7884         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7885                            CombinedShiftNorm);
7886       }
7887     }
7888   }
7889   return SDValue();
7890 }
7891 
7892 SDValue DAGCombiner::visitSHL(SDNode *N) {
7893   SDValue N0 = N->getOperand(0);
7894   SDValue N1 = N->getOperand(1);
7895   if (SDValue V = DAG.simplifyShift(N0, N1))
7896     return V;
7897 
7898   EVT VT = N0.getValueType();
7899   EVT ShiftVT = N1.getValueType();
7900   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7901 
7902   // fold vector ops
7903   if (VT.isVector()) {
7904     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7905       return FoldedVOp;
7906 
7907     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7908     // If setcc produces all-one true value then:
7909     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7910     if (N1CV && N1CV->isConstant()) {
7911       if (N0.getOpcode() == ISD::AND) {
7912         SDValue N00 = N0->getOperand(0);
7913         SDValue N01 = N0->getOperand(1);
7914         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7915 
7916         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7917             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7918                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7919           if (SDValue C =
7920                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
7921             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7922         }
7923       }
7924     }
7925   }
7926 
7927   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7928 
7929   // fold (shl c1, c2) -> c1<<c2
7930   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
7931     return C;
7932 
7933   if (SDValue NewSel = foldBinOpIntoSelect(N))
7934     return NewSel;
7935 
7936   // if (shl x, c) is known to be zero, return 0
7937   if (DAG.MaskedValueIsZero(SDValue(N, 0),
7938                             APInt::getAllOnesValue(OpSizeInBits)))
7939     return DAG.getConstant(0, SDLoc(N), VT);
7940 
7941   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7942   if (N1.getOpcode() == ISD::TRUNCATE &&
7943       N1.getOperand(0).getOpcode() == ISD::AND) {
7944     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7945       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7946   }
7947 
7948   if (SimplifyDemandedBits(SDValue(N, 0)))
7949     return SDValue(N, 0);
7950 
7951   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7952   if (N0.getOpcode() == ISD::SHL) {
7953     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7954                                           ConstantSDNode *RHS) {
7955       APInt c1 = LHS->getAPIntValue();
7956       APInt c2 = RHS->getAPIntValue();
7957       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7958       return (c1 + c2).uge(OpSizeInBits);
7959     };
7960     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7961       return DAG.getConstant(0, SDLoc(N), VT);
7962 
7963     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7964                                        ConstantSDNode *RHS) {
7965       APInt c1 = LHS->getAPIntValue();
7966       APInt c2 = RHS->getAPIntValue();
7967       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7968       return (c1 + c2).ult(OpSizeInBits);
7969     };
7970     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7971       SDLoc DL(N);
7972       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7973       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7974     }
7975   }
7976 
7977   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7978   // For this to be valid, the second form must not preserve any of the bits
7979   // that are shifted out by the inner shift in the first form.  This means
7980   // the outer shift size must be >= the number of bits added by the ext.
7981   // As a corollary, we don't care what kind of ext it is.
7982   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7983        N0.getOpcode() == ISD::ANY_EXTEND ||
7984        N0.getOpcode() == ISD::SIGN_EXTEND) &&
7985       N0.getOperand(0).getOpcode() == ISD::SHL) {
7986     SDValue N0Op0 = N0.getOperand(0);
7987     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7988     EVT InnerVT = N0Op0.getValueType();
7989     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7990 
7991     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7992                                                          ConstantSDNode *RHS) {
7993       APInt c1 = LHS->getAPIntValue();
7994       APInt c2 = RHS->getAPIntValue();
7995       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7996       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7997              (c1 + c2).uge(OpSizeInBits);
7998     };
7999     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8000                                   /*AllowUndefs*/ false,
8001                                   /*AllowTypeMismatch*/ true))
8002       return DAG.getConstant(0, SDLoc(N), VT);
8003 
8004     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8005                                                       ConstantSDNode *RHS) {
8006       APInt c1 = LHS->getAPIntValue();
8007       APInt c2 = RHS->getAPIntValue();
8008       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8009       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8010              (c1 + c2).ult(OpSizeInBits);
8011     };
8012     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8013                                   /*AllowUndefs*/ false,
8014                                   /*AllowTypeMismatch*/ true)) {
8015       SDLoc DL(N);
8016       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8017       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8018       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8019       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8020     }
8021   }
8022 
8023   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8024   // Only fold this if the inner zext has no other uses to avoid increasing
8025   // the total number of instructions.
8026   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8027       N0.getOperand(0).getOpcode() == ISD::SRL) {
8028     SDValue N0Op0 = N0.getOperand(0);
8029     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8030 
8031     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8032       APInt c1 = LHS->getAPIntValue();
8033       APInt c2 = RHS->getAPIntValue();
8034       zeroExtendToMatch(c1, c2);
8035       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8036     };
8037     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8038                                   /*AllowUndefs*/ false,
8039                                   /*AllowTypeMismatch*/ true)) {
8040       SDLoc DL(N);
8041       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8042       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8043       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8044       AddToWorklist(NewSHL.getNode());
8045       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8046     }
8047   }
8048 
8049   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
8050   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
8051   // TODO - support non-uniform vector shift amounts.
8052   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8053       N0->getFlags().hasExact()) {
8054     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8055       uint64_t C1 = N0C1->getZExtValue();
8056       uint64_t C2 = N1C->getZExtValue();
8057       SDLoc DL(N);
8058       if (C1 <= C2)
8059         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8060                            DAG.getConstant(C2 - C1, DL, ShiftVT));
8061       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8062                          DAG.getConstant(C1 - C2, DL, ShiftVT));
8063     }
8064   }
8065 
8066   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8067   //                               (and (srl x, (sub c1, c2), MASK)
8068   // Only fold this if the inner shift has no other uses -- if it does, folding
8069   // this will increase the total number of instructions.
8070   // TODO - drop hasOneUse requirement if c1 == c2?
8071   // TODO - support non-uniform vector shift amounts.
8072   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8073       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8074     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8075       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8076         uint64_t c1 = N0C1->getZExtValue();
8077         uint64_t c2 = N1C->getZExtValue();
8078         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8079         SDValue Shift;
8080         if (c2 > c1) {
8081           Mask <<= c2 - c1;
8082           SDLoc DL(N);
8083           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8084                               DAG.getConstant(c2 - c1, DL, ShiftVT));
8085         } else {
8086           Mask.lshrInPlace(c1 - c2);
8087           SDLoc DL(N);
8088           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8089                               DAG.getConstant(c1 - c2, DL, ShiftVT));
8090         }
8091         SDLoc DL(N0);
8092         return DAG.getNode(ISD::AND, DL, VT, Shift,
8093                            DAG.getConstant(Mask, DL, VT));
8094       }
8095     }
8096   }
8097 
8098   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8099   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8100       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8101     SDLoc DL(N);
8102     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8103     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8104     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8105   }
8106 
8107   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8108   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8109   // Variant of version done on multiply, except mul by a power of 2 is turned
8110   // into a shift.
8111   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8112       N0.getNode()->hasOneUse() &&
8113       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8114       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8115       TLI.isDesirableToCommuteWithShift(N, Level)) {
8116     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8117     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8118     AddToWorklist(Shl0.getNode());
8119     AddToWorklist(Shl1.getNode());
8120     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8121   }
8122 
8123   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8124   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8125       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8126       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8127     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8128     if (isConstantOrConstantVector(Shl))
8129       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8130   }
8131 
8132   if (N1C && !N1C->isOpaque())
8133     if (SDValue NewSHL = visitShiftByConstant(N))
8134       return NewSHL;
8135 
8136   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8137   if (N0.getOpcode() == ISD::VSCALE)
8138     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8139       auto DL = SDLoc(N);
8140       APInt C0 = N0.getConstantOperandAPInt(0);
8141       APInt C1 = NC1->getAPIntValue();
8142       return DAG.getVScale(DL, VT, C0 << C1);
8143     }
8144 
8145   return SDValue();
8146 }
8147 
8148 // Transform a right shift of a multiply into a multiply-high.
8149 // Examples:
8150 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8151 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8152 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8153                                   const TargetLowering &TLI) {
8154   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8155          "SRL or SRA node is required here!");
8156 
8157   // Check the shift amount. Proceed with the transformation if the shift
8158   // amount is constant.
8159   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8160   if (!ShiftAmtSrc)
8161     return SDValue();
8162 
8163   SDLoc DL(N);
8164 
8165   // The operation feeding into the shift must be a multiply.
8166   SDValue ShiftOperand = N->getOperand(0);
8167   if (ShiftOperand.getOpcode() != ISD::MUL)
8168     return SDValue();
8169 
8170   // Both operands must be equivalent extend nodes.
8171   SDValue LeftOp = ShiftOperand.getOperand(0);
8172   SDValue RightOp = ShiftOperand.getOperand(1);
8173   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8174   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8175 
8176   if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8177     return SDValue();
8178 
8179   EVT WideVT1 = LeftOp.getValueType();
8180   EVT WideVT2 = RightOp.getValueType();
8181   (void)WideVT2;
8182   // Proceed with the transformation if the wide types match.
8183   assert((WideVT1 == WideVT2) &&
8184          "Cannot have a multiply node with two different operand types.");
8185 
8186   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8187   // Check that the two extend nodes are the same type.
8188   if (NarrowVT !=  RightOp.getOperand(0).getValueType())
8189     return SDValue();
8190 
8191   // Only transform into mulh if mulh for the narrow type is cheaper than
8192   // a multiply followed by a shift. This should also check if mulh is
8193   // legal for NarrowVT on the target.
8194   if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
8195       return SDValue();
8196 
8197   // Proceed with the transformation if the wide type is twice as large
8198   // as the narrow type.
8199   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8200   if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8201     return SDValue();
8202 
8203   // Check the shift amount with the narrow type size.
8204   // Proceed with the transformation if the shift amount is the width
8205   // of the narrow type.
8206   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8207   if (ShiftAmt != NarrowVTSize)
8208     return SDValue();
8209 
8210   // If the operation feeding into the MUL is a sign extend (sext),
8211   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8212   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8213 
8214   SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8215                                RightOp.getOperand(0));
8216   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8217                                      : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8218 }
8219 
8220 SDValue DAGCombiner::visitSRA(SDNode *N) {
8221   SDValue N0 = N->getOperand(0);
8222   SDValue N1 = N->getOperand(1);
8223   if (SDValue V = DAG.simplifyShift(N0, N1))
8224     return V;
8225 
8226   EVT VT = N0.getValueType();
8227   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8228 
8229   // Arithmetic shifting an all-sign-bit value is a no-op.
8230   // fold (sra 0, x) -> 0
8231   // fold (sra -1, x) -> -1
8232   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8233     return N0;
8234 
8235   // fold vector ops
8236   if (VT.isVector())
8237     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8238       return FoldedVOp;
8239 
8240   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8241 
8242   // fold (sra c1, c2) -> (sra c1, c2)
8243   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8244     return C;
8245 
8246   if (SDValue NewSel = foldBinOpIntoSelect(N))
8247     return NewSel;
8248 
8249   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8250   // sext_inreg.
8251   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8252     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8253     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8254     if (VT.isVector())
8255       ExtVT = EVT::getVectorVT(*DAG.getContext(),
8256                                ExtVT, VT.getVectorNumElements());
8257     if (!LegalOperations ||
8258         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8259         TargetLowering::Legal)
8260       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8261                          N0.getOperand(0), DAG.getValueType(ExtVT));
8262   }
8263 
8264   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8265   // clamp (add c1, c2) to max shift.
8266   if (N0.getOpcode() == ISD::SRA) {
8267     SDLoc DL(N);
8268     EVT ShiftVT = N1.getValueType();
8269     EVT ShiftSVT = ShiftVT.getScalarType();
8270     SmallVector<SDValue, 16> ShiftValues;
8271 
8272     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8273       APInt c1 = LHS->getAPIntValue();
8274       APInt c2 = RHS->getAPIntValue();
8275       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8276       APInt Sum = c1 + c2;
8277       unsigned ShiftSum =
8278           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8279       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8280       return true;
8281     };
8282     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8283       SDValue ShiftValue;
8284       if (VT.isVector())
8285         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8286       else
8287         ShiftValue = ShiftValues[0];
8288       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8289     }
8290   }
8291 
8292   // fold (sra (shl X, m), (sub result_size, n))
8293   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8294   // result_size - n != m.
8295   // If truncate is free for the target sext(shl) is likely to result in better
8296   // code.
8297   if (N0.getOpcode() == ISD::SHL && N1C) {
8298     // Get the two constanst of the shifts, CN0 = m, CN = n.
8299     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8300     if (N01C) {
8301       LLVMContext &Ctx = *DAG.getContext();
8302       // Determine what the truncate's result bitsize and type would be.
8303       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8304 
8305       if (VT.isVector())
8306         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8307 
8308       // Determine the residual right-shift amount.
8309       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8310 
8311       // If the shift is not a no-op (in which case this should be just a sign
8312       // extend already), the truncated to type is legal, sign_extend is legal
8313       // on that type, and the truncate to that type is both legal and free,
8314       // perform the transform.
8315       if ((ShiftAmt > 0) &&
8316           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8317           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8318           TLI.isTruncateFree(VT, TruncVT)) {
8319         SDLoc DL(N);
8320         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8321             getShiftAmountTy(N0.getOperand(0).getValueType()));
8322         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8323                                     N0.getOperand(0), Amt);
8324         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8325                                     Shift);
8326         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8327                            N->getValueType(0), Trunc);
8328       }
8329     }
8330   }
8331 
8332   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8333   //   sra (add (shl X, N1C), AddC), N1C -->
8334   //   sext (add (trunc X to (width - N1C)), AddC')
8335   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8336       N0.getOperand(0).getOpcode() == ISD::SHL &&
8337       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8338     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8339       SDValue Shl = N0.getOperand(0);
8340       // Determine what the truncate's type would be and ask the target if that
8341       // is a free operation.
8342       LLVMContext &Ctx = *DAG.getContext();
8343       unsigned ShiftAmt = N1C->getZExtValue();
8344       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8345       if (VT.isVector())
8346         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8347 
8348       // TODO: The simple type check probably belongs in the default hook
8349       //       implementation and/or target-specific overrides (because
8350       //       non-simple types likely require masking when legalized), but that
8351       //       restriction may conflict with other transforms.
8352       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8353           TLI.isTruncateFree(VT, TruncVT)) {
8354         SDLoc DL(N);
8355         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8356         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8357                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8358         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8359         return DAG.getSExtOrTrunc(Add, DL, VT);
8360       }
8361     }
8362   }
8363 
8364   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8365   if (N1.getOpcode() == ISD::TRUNCATE &&
8366       N1.getOperand(0).getOpcode() == ISD::AND) {
8367     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8368       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8369   }
8370 
8371   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8372   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8373   //      if c1 is equal to the number of bits the trunc removes
8374   // TODO - support non-uniform vector shift amounts.
8375   if (N0.getOpcode() == ISD::TRUNCATE &&
8376       (N0.getOperand(0).getOpcode() == ISD::SRL ||
8377        N0.getOperand(0).getOpcode() == ISD::SRA) &&
8378       N0.getOperand(0).hasOneUse() &&
8379       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8380     SDValue N0Op0 = N0.getOperand(0);
8381     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8382       EVT LargeVT = N0Op0.getValueType();
8383       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8384       if (LargeShift->getAPIntValue() == TruncBits) {
8385         SDLoc DL(N);
8386         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8387                                       getShiftAmountTy(LargeVT));
8388         SDValue SRA =
8389             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8390         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8391       }
8392     }
8393   }
8394 
8395   // Simplify, based on bits shifted out of the LHS.
8396   if (SimplifyDemandedBits(SDValue(N, 0)))
8397     return SDValue(N, 0);
8398 
8399   // If the sign bit is known to be zero, switch this to a SRL.
8400   if (DAG.SignBitIsZero(N0))
8401     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8402 
8403   if (N1C && !N1C->isOpaque())
8404     if (SDValue NewSRA = visitShiftByConstant(N))
8405       return NewSRA;
8406 
8407   // Try to transform this shift into a multiply-high if
8408   // it matches the appropriate pattern detected in combineShiftToMULH.
8409   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8410     return MULH;
8411 
8412   return SDValue();
8413 }
8414 
8415 SDValue DAGCombiner::visitSRL(SDNode *N) {
8416   SDValue N0 = N->getOperand(0);
8417   SDValue N1 = N->getOperand(1);
8418   if (SDValue V = DAG.simplifyShift(N0, N1))
8419     return V;
8420 
8421   EVT VT = N0.getValueType();
8422   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8423 
8424   // fold vector ops
8425   if (VT.isVector())
8426     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8427       return FoldedVOp;
8428 
8429   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8430 
8431   // fold (srl c1, c2) -> c1 >>u c2
8432   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8433     return C;
8434 
8435   if (SDValue NewSel = foldBinOpIntoSelect(N))
8436     return NewSel;
8437 
8438   // if (srl x, c) is known to be zero, return 0
8439   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8440                                    APInt::getAllOnesValue(OpSizeInBits)))
8441     return DAG.getConstant(0, SDLoc(N), VT);
8442 
8443   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8444   if (N0.getOpcode() == ISD::SRL) {
8445     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8446                                           ConstantSDNode *RHS) {
8447       APInt c1 = LHS->getAPIntValue();
8448       APInt c2 = RHS->getAPIntValue();
8449       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8450       return (c1 + c2).uge(OpSizeInBits);
8451     };
8452     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8453       return DAG.getConstant(0, SDLoc(N), VT);
8454 
8455     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8456                                        ConstantSDNode *RHS) {
8457       APInt c1 = LHS->getAPIntValue();
8458       APInt c2 = RHS->getAPIntValue();
8459       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8460       return (c1 + c2).ult(OpSizeInBits);
8461     };
8462     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8463       SDLoc DL(N);
8464       EVT ShiftVT = N1.getValueType();
8465       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8466       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8467     }
8468   }
8469 
8470   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8471       N0.getOperand(0).getOpcode() == ISD::SRL) {
8472     SDValue InnerShift = N0.getOperand(0);
8473     // TODO - support non-uniform vector shift amounts.
8474     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8475       uint64_t c1 = N001C->getZExtValue();
8476       uint64_t c2 = N1C->getZExtValue();
8477       EVT InnerShiftVT = InnerShift.getValueType();
8478       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8479       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8480       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8481       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8482       if (c1 + OpSizeInBits == InnerShiftSize) {
8483         SDLoc DL(N);
8484         if (c1 + c2 >= InnerShiftSize)
8485           return DAG.getConstant(0, DL, VT);
8486         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8487         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8488                                        InnerShift.getOperand(0), NewShiftAmt);
8489         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8490       }
8491       // In the more general case, we can clear the high bits after the shift:
8492       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8493       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8494           c1 + c2 < InnerShiftSize) {
8495         SDLoc DL(N);
8496         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8497         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8498                                        InnerShift.getOperand(0), NewShiftAmt);
8499         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8500                                                             OpSizeInBits - c2),
8501                                        DL, InnerShiftVT);
8502         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8503         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8504       }
8505     }
8506   }
8507 
8508   // fold (srl (shl x, c), c) -> (and x, cst2)
8509   // TODO - (srl (shl x, c1), c2).
8510   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8511       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8512     SDLoc DL(N);
8513     SDValue Mask =
8514         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8515     AddToWorklist(Mask.getNode());
8516     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8517   }
8518 
8519   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8520   // TODO - support non-uniform vector shift amounts.
8521   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8522     // Shifting in all undef bits?
8523     EVT SmallVT = N0.getOperand(0).getValueType();
8524     unsigned BitSize = SmallVT.getScalarSizeInBits();
8525     if (N1C->getAPIntValue().uge(BitSize))
8526       return DAG.getUNDEF(VT);
8527 
8528     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8529       uint64_t ShiftAmt = N1C->getZExtValue();
8530       SDLoc DL0(N0);
8531       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8532                                        N0.getOperand(0),
8533                           DAG.getConstant(ShiftAmt, DL0,
8534                                           getShiftAmountTy(SmallVT)));
8535       AddToWorklist(SmallShift.getNode());
8536       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8537       SDLoc DL(N);
8538       return DAG.getNode(ISD::AND, DL, VT,
8539                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8540                          DAG.getConstant(Mask, DL, VT));
8541     }
8542   }
8543 
8544   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8545   // bit, which is unmodified by sra.
8546   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8547     if (N0.getOpcode() == ISD::SRA)
8548       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8549   }
8550 
8551   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8552   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8553       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8554     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8555 
8556     // If any of the input bits are KnownOne, then the input couldn't be all
8557     // zeros, thus the result of the srl will always be zero.
8558     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8559 
8560     // If all of the bits input the to ctlz node are known to be zero, then
8561     // the result of the ctlz is "32" and the result of the shift is one.
8562     APInt UnknownBits = ~Known.Zero;
8563     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8564 
8565     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8566     if (UnknownBits.isPowerOf2()) {
8567       // Okay, we know that only that the single bit specified by UnknownBits
8568       // could be set on input to the CTLZ node. If this bit is set, the SRL
8569       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8570       // to an SRL/XOR pair, which is likely to simplify more.
8571       unsigned ShAmt = UnknownBits.countTrailingZeros();
8572       SDValue Op = N0.getOperand(0);
8573 
8574       if (ShAmt) {
8575         SDLoc DL(N0);
8576         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8577                   DAG.getConstant(ShAmt, DL,
8578                                   getShiftAmountTy(Op.getValueType())));
8579         AddToWorklist(Op.getNode());
8580       }
8581 
8582       SDLoc DL(N);
8583       return DAG.getNode(ISD::XOR, DL, VT,
8584                          Op, DAG.getConstant(1, DL, VT));
8585     }
8586   }
8587 
8588   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8589   if (N1.getOpcode() == ISD::TRUNCATE &&
8590       N1.getOperand(0).getOpcode() == ISD::AND) {
8591     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8592       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8593   }
8594 
8595   // fold operands of srl based on knowledge that the low bits are not
8596   // demanded.
8597   if (SimplifyDemandedBits(SDValue(N, 0)))
8598     return SDValue(N, 0);
8599 
8600   if (N1C && !N1C->isOpaque())
8601     if (SDValue NewSRL = visitShiftByConstant(N))
8602       return NewSRL;
8603 
8604   // Attempt to convert a srl of a load into a narrower zero-extending load.
8605   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8606     return NarrowLoad;
8607 
8608   // Here is a common situation. We want to optimize:
8609   //
8610   //   %a = ...
8611   //   %b = and i32 %a, 2
8612   //   %c = srl i32 %b, 1
8613   //   brcond i32 %c ...
8614   //
8615   // into
8616   //
8617   //   %a = ...
8618   //   %b = and %a, 2
8619   //   %c = setcc eq %b, 0
8620   //   brcond %c ...
8621   //
8622   // However when after the source operand of SRL is optimized into AND, the SRL
8623   // itself may not be optimized further. Look for it and add the BRCOND into
8624   // the worklist.
8625   if (N->hasOneUse()) {
8626     SDNode *Use = *N->use_begin();
8627     if (Use->getOpcode() == ISD::BRCOND)
8628       AddToWorklist(Use);
8629     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8630       // Also look pass the truncate.
8631       Use = *Use->use_begin();
8632       if (Use->getOpcode() == ISD::BRCOND)
8633         AddToWorklist(Use);
8634     }
8635   }
8636 
8637   // Try to transform this shift into a multiply-high if
8638   // it matches the appropriate pattern detected in combineShiftToMULH.
8639   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8640     return MULH;
8641 
8642   return SDValue();
8643 }
8644 
8645 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8646   EVT VT = N->getValueType(0);
8647   SDValue N0 = N->getOperand(0);
8648   SDValue N1 = N->getOperand(1);
8649   SDValue N2 = N->getOperand(2);
8650   bool IsFSHL = N->getOpcode() == ISD::FSHL;
8651   unsigned BitWidth = VT.getScalarSizeInBits();
8652 
8653   // fold (fshl N0, N1, 0) -> N0
8654   // fold (fshr N0, N1, 0) -> N1
8655   if (isPowerOf2_32(BitWidth))
8656     if (DAG.MaskedValueIsZero(
8657             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8658       return IsFSHL ? N0 : N1;
8659 
8660   auto IsUndefOrZero = [](SDValue V) {
8661     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8662   };
8663 
8664   // TODO - support non-uniform vector shift amounts.
8665   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8666     EVT ShAmtTy = N2.getValueType();
8667 
8668     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8669     if (Cst->getAPIntValue().uge(BitWidth)) {
8670       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8671       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8672                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8673     }
8674 
8675     unsigned ShAmt = Cst->getZExtValue();
8676     if (ShAmt == 0)
8677       return IsFSHL ? N0 : N1;
8678 
8679     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8680     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8681     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8682     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8683     if (IsUndefOrZero(N0))
8684       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8685                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8686                                          SDLoc(N), ShAmtTy));
8687     if (IsUndefOrZero(N1))
8688       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8689                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8690                                          SDLoc(N), ShAmtTy));
8691 
8692     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8693     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8694     // TODO - bigendian support once we have test coverage.
8695     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
8696     // TODO - permit LHS EXTLOAD if extensions are shifted out.
8697     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
8698         !DAG.getDataLayout().isBigEndian()) {
8699       auto *LHS = dyn_cast<LoadSDNode>(N0);
8700       auto *RHS = dyn_cast<LoadSDNode>(N1);
8701       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
8702           LHS->getAddressSpace() == RHS->getAddressSpace() &&
8703           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
8704           ISD::isNON_EXTLoad(LHS)) {
8705         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
8706           SDLoc DL(RHS);
8707           uint64_t PtrOff =
8708               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
8709           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
8710           bool Fast = false;
8711           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8712                                      RHS->getAddressSpace(), NewAlign,
8713                                      RHS->getMemOperand()->getFlags(), &Fast) &&
8714               Fast) {
8715             SDValue NewPtr = DAG.getMemBasePlusOffset(
8716                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
8717             AddToWorklist(NewPtr.getNode());
8718             SDValue Load = DAG.getLoad(
8719                 VT, DL, RHS->getChain(), NewPtr,
8720                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8721                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
8722             // Replace the old load's chain with the new load's chain.
8723             WorklistRemover DeadNodes(*this);
8724             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
8725             return Load;
8726           }
8727         }
8728       }
8729     }
8730   }
8731 
8732   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8733   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8734   // iff We know the shift amount is in range.
8735   // TODO: when is it worth doing SUB(BW, N2) as well?
8736   if (isPowerOf2_32(BitWidth)) {
8737     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8738     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8739       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8740     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8741       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8742   }
8743 
8744   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
8745   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
8746   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8747   // is legal as well we might be better off avoiding non-constant (BW - N2).
8748   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8749   if (N0 == N1 && hasOperation(RotOpc, VT))
8750     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8751 
8752   // Simplify, based on bits shifted out of N0/N1.
8753   if (SimplifyDemandedBits(SDValue(N, 0)))
8754     return SDValue(N, 0);
8755 
8756   return SDValue();
8757 }
8758 
8759 SDValue DAGCombiner::visitABS(SDNode *N) {
8760   SDValue N0 = N->getOperand(0);
8761   EVT VT = N->getValueType(0);
8762 
8763   // fold (abs c1) -> c2
8764   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8765     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8766   // fold (abs (abs x)) -> (abs x)
8767   if (N0.getOpcode() == ISD::ABS)
8768     return N0;
8769   // fold (abs x) -> x iff not-negative
8770   if (DAG.SignBitIsZero(N0))
8771     return N0;
8772   return SDValue();
8773 }
8774 
8775 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8776   SDValue N0 = N->getOperand(0);
8777   EVT VT = N->getValueType(0);
8778 
8779   // fold (bswap c1) -> c2
8780   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8781     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8782   // fold (bswap (bswap x)) -> x
8783   if (N0.getOpcode() == ISD::BSWAP)
8784     return N0->getOperand(0);
8785   return SDValue();
8786 }
8787 
8788 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8789   SDValue N0 = N->getOperand(0);
8790   EVT VT = N->getValueType(0);
8791 
8792   // fold (bitreverse c1) -> c2
8793   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8794     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8795   // fold (bitreverse (bitreverse x)) -> x
8796   if (N0.getOpcode() == ISD::BITREVERSE)
8797     return N0.getOperand(0);
8798   return SDValue();
8799 }
8800 
8801 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8802   SDValue N0 = N->getOperand(0);
8803   EVT VT = N->getValueType(0);
8804 
8805   // fold (ctlz c1) -> c2
8806   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8807     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8808 
8809   // If the value is known never to be zero, switch to the undef version.
8810   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8811     if (DAG.isKnownNeverZero(N0))
8812       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8813   }
8814 
8815   return SDValue();
8816 }
8817 
8818 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8819   SDValue N0 = N->getOperand(0);
8820   EVT VT = N->getValueType(0);
8821 
8822   // fold (ctlz_zero_undef c1) -> c2
8823   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8824     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8825   return SDValue();
8826 }
8827 
8828 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8829   SDValue N0 = N->getOperand(0);
8830   EVT VT = N->getValueType(0);
8831 
8832   // fold (cttz c1) -> c2
8833   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8834     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8835 
8836   // If the value is known never to be zero, switch to the undef version.
8837   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8838     if (DAG.isKnownNeverZero(N0))
8839       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8840   }
8841 
8842   return SDValue();
8843 }
8844 
8845 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8846   SDValue N0 = N->getOperand(0);
8847   EVT VT = N->getValueType(0);
8848 
8849   // fold (cttz_zero_undef c1) -> c2
8850   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8851     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8852   return SDValue();
8853 }
8854 
8855 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8856   SDValue N0 = N->getOperand(0);
8857   EVT VT = N->getValueType(0);
8858 
8859   // fold (ctpop c1) -> c2
8860   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8861     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8862   return SDValue();
8863 }
8864 
8865 // FIXME: This should be checking for no signed zeros on individual operands, as
8866 // well as no nans.
8867 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8868                                          SDValue RHS,
8869                                          const TargetLowering &TLI) {
8870   const TargetOptions &Options = DAG.getTarget().Options;
8871   EVT VT = LHS.getValueType();
8872 
8873   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8874          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8875          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8876 }
8877 
8878 /// Generate Min/Max node
8879 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8880                                    SDValue RHS, SDValue True, SDValue False,
8881                                    ISD::CondCode CC, const TargetLowering &TLI,
8882                                    SelectionDAG &DAG) {
8883   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8884     return SDValue();
8885 
8886   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8887   switch (CC) {
8888   case ISD::SETOLT:
8889   case ISD::SETOLE:
8890   case ISD::SETLT:
8891   case ISD::SETLE:
8892   case ISD::SETULT:
8893   case ISD::SETULE: {
8894     // Since it's known never nan to get here already, either fminnum or
8895     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8896     // expanded in terms of it.
8897     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8898     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8899       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8900 
8901     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8902     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8903       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8904     return SDValue();
8905   }
8906   case ISD::SETOGT:
8907   case ISD::SETOGE:
8908   case ISD::SETGT:
8909   case ISD::SETGE:
8910   case ISD::SETUGT:
8911   case ISD::SETUGE: {
8912     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8913     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8914       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8915 
8916     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8917     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8918       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8919     return SDValue();
8920   }
8921   default:
8922     return SDValue();
8923   }
8924 }
8925 
8926 /// If a (v)select has a condition value that is a sign-bit test, try to smear
8927 /// the condition operand sign-bit across the value width and use it as a mask.
8928 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
8929   SDValue Cond = N->getOperand(0);
8930   SDValue C1 = N->getOperand(1);
8931   SDValue C2 = N->getOperand(2);
8932   assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
8933          "Expected select-of-constants");
8934 
8935   EVT VT = N->getValueType(0);
8936   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
8937       VT != Cond.getOperand(0).getValueType())
8938     return SDValue();
8939 
8940   // The inverted-condition + commuted-select variants of these patterns are
8941   // canonicalized to these forms in IR.
8942   SDValue X = Cond.getOperand(0);
8943   SDValue CondC = Cond.getOperand(1);
8944   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
8945   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
8946       isAllOnesOrAllOnesSplat(C2)) {
8947     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
8948     SDLoc DL(N);
8949     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8950     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8951     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
8952   }
8953   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
8954     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
8955     SDLoc DL(N);
8956     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8957     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8958     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
8959   }
8960   return SDValue();
8961 }
8962 
8963 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8964   SDValue Cond = N->getOperand(0);
8965   SDValue N1 = N->getOperand(1);
8966   SDValue N2 = N->getOperand(2);
8967   EVT VT = N->getValueType(0);
8968   EVT CondVT = Cond.getValueType();
8969   SDLoc DL(N);
8970 
8971   if (!VT.isInteger())
8972     return SDValue();
8973 
8974   auto *C1 = dyn_cast<ConstantSDNode>(N1);
8975   auto *C2 = dyn_cast<ConstantSDNode>(N2);
8976   if (!C1 || !C2)
8977     return SDValue();
8978 
8979   // Only do this before legalization to avoid conflicting with target-specific
8980   // transforms in the other direction (create a select from a zext/sext). There
8981   // is also a target-independent combine here in DAGCombiner in the other
8982   // direction for (select Cond, -1, 0) when the condition is not i1.
8983   if (CondVT == MVT::i1 && !LegalOperations) {
8984     if (C1->isNullValue() && C2->isOne()) {
8985       // select Cond, 0, 1 --> zext (!Cond)
8986       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8987       if (VT != MVT::i1)
8988         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8989       return NotCond;
8990     }
8991     if (C1->isNullValue() && C2->isAllOnesValue()) {
8992       // select Cond, 0, -1 --> sext (!Cond)
8993       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8994       if (VT != MVT::i1)
8995         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8996       return NotCond;
8997     }
8998     if (C1->isOne() && C2->isNullValue()) {
8999       // select Cond, 1, 0 --> zext (Cond)
9000       if (VT != MVT::i1)
9001         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9002       return Cond;
9003     }
9004     if (C1->isAllOnesValue() && C2->isNullValue()) {
9005       // select Cond, -1, 0 --> sext (Cond)
9006       if (VT != MVT::i1)
9007         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9008       return Cond;
9009     }
9010 
9011     // Use a target hook because some targets may prefer to transform in the
9012     // other direction.
9013     if (TLI.convertSelectOfConstantsToMath(VT)) {
9014       // For any constants that differ by 1, we can transform the select into an
9015       // extend and add.
9016       const APInt &C1Val = C1->getAPIntValue();
9017       const APInt &C2Val = C2->getAPIntValue();
9018       if (C1Val - 1 == C2Val) {
9019         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9020         if (VT != MVT::i1)
9021           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9022         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9023       }
9024       if (C1Val + 1 == C2Val) {
9025         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9026         if (VT != MVT::i1)
9027           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9028         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9029       }
9030 
9031       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9032       if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
9033         if (VT != MVT::i1)
9034           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9035         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9036         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9037       }
9038 
9039       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9040         return V;
9041     }
9042 
9043     return SDValue();
9044   }
9045 
9046   // fold (select Cond, 0, 1) -> (xor Cond, 1)
9047   // We can't do this reliably if integer based booleans have different contents
9048   // to floating point based booleans. This is because we can't tell whether we
9049   // have an integer-based boolean or a floating-point-based boolean unless we
9050   // can find the SETCC that produced it and inspect its operands. This is
9051   // fairly easy if C is the SETCC node, but it can potentially be
9052   // undiscoverable (or not reasonably discoverable). For example, it could be
9053   // in another basic block or it could require searching a complicated
9054   // expression.
9055   if (CondVT.isInteger() &&
9056       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9057           TargetLowering::ZeroOrOneBooleanContent &&
9058       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9059           TargetLowering::ZeroOrOneBooleanContent &&
9060       C1->isNullValue() && C2->isOne()) {
9061     SDValue NotCond =
9062         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9063     if (VT.bitsEq(CondVT))
9064       return NotCond;
9065     return DAG.getZExtOrTrunc(NotCond, DL, VT);
9066   }
9067 
9068   return SDValue();
9069 }
9070 
9071 SDValue DAGCombiner::visitSELECT(SDNode *N) {
9072   SDValue N0 = N->getOperand(0);
9073   SDValue N1 = N->getOperand(1);
9074   SDValue N2 = N->getOperand(2);
9075   EVT VT = N->getValueType(0);
9076   EVT VT0 = N0.getValueType();
9077   SDLoc DL(N);
9078   SDNodeFlags Flags = N->getFlags();
9079 
9080   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9081     return V;
9082 
9083   // fold (select X, X, Y) -> (or X, Y)
9084   // fold (select X, 1, Y) -> (or C, Y)
9085   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
9086     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
9087 
9088   if (SDValue V = foldSelectOfConstants(N))
9089     return V;
9090 
9091   // fold (select C, 0, X) -> (and (not C), X)
9092   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
9093     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
9094     AddToWorklist(NOTNode.getNode());
9095     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
9096   }
9097   // fold (select C, X, 1) -> (or (not C), X)
9098   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
9099     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
9100     AddToWorklist(NOTNode.getNode());
9101     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
9102   }
9103   // fold (select X, Y, X) -> (and X, Y)
9104   // fold (select X, Y, 0) -> (and X, Y)
9105   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
9106     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
9107 
9108   // If we can fold this based on the true/false value, do so.
9109   if (SimplifySelectOps(N, N1, N2))
9110     return SDValue(N, 0); // Don't revisit N.
9111 
9112   if (VT0 == MVT::i1) {
9113     // The code in this block deals with the following 2 equivalences:
9114     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9115     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9116     // The target can specify its preferred form with the
9117     // shouldNormalizeToSelectSequence() callback. However we always transform
9118     // to the right anyway if we find the inner select exists in the DAG anyway
9119     // and we always transform to the left side if we know that we can further
9120     // optimize the combination of the conditions.
9121     bool normalizeToSequence =
9122         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9123     // select (and Cond0, Cond1), X, Y
9124     //   -> select Cond0, (select Cond1, X, Y), Y
9125     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9126       SDValue Cond0 = N0->getOperand(0);
9127       SDValue Cond1 = N0->getOperand(1);
9128       SDValue InnerSelect =
9129           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9130       if (normalizeToSequence || !InnerSelect.use_empty())
9131         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9132                            InnerSelect, N2, Flags);
9133       // Cleanup on failure.
9134       if (InnerSelect.use_empty())
9135         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9136     }
9137     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9138     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9139       SDValue Cond0 = N0->getOperand(0);
9140       SDValue Cond1 = N0->getOperand(1);
9141       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9142                                         Cond1, N1, N2, Flags);
9143       if (normalizeToSequence || !InnerSelect.use_empty())
9144         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9145                            InnerSelect, Flags);
9146       // Cleanup on failure.
9147       if (InnerSelect.use_empty())
9148         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9149     }
9150 
9151     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9152     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9153       SDValue N1_0 = N1->getOperand(0);
9154       SDValue N1_1 = N1->getOperand(1);
9155       SDValue N1_2 = N1->getOperand(2);
9156       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9157         // Create the actual and node if we can generate good code for it.
9158         if (!normalizeToSequence) {
9159           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9160           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9161                              N2, Flags);
9162         }
9163         // Otherwise see if we can optimize the "and" to a better pattern.
9164         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9165           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9166                              N2, Flags);
9167         }
9168       }
9169     }
9170     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9171     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9172       SDValue N2_0 = N2->getOperand(0);
9173       SDValue N2_1 = N2->getOperand(1);
9174       SDValue N2_2 = N2->getOperand(2);
9175       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9176         // Create the actual or node if we can generate good code for it.
9177         if (!normalizeToSequence) {
9178           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9179           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9180                              N2_2, Flags);
9181         }
9182         // Otherwise see if we can optimize to a better pattern.
9183         if (SDValue Combined = visitORLike(N0, N2_0, N))
9184           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9185                              N2_2, Flags);
9186       }
9187     }
9188   }
9189 
9190   // select (not Cond), N1, N2 -> select Cond, N2, N1
9191   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9192     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9193     SelectOp->setFlags(Flags);
9194     return SelectOp;
9195   }
9196 
9197   // Fold selects based on a setcc into other things, such as min/max/abs.
9198   if (N0.getOpcode() == ISD::SETCC) {
9199     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9200     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9201 
9202     // select (fcmp lt x, y), x, y -> fminnum x, y
9203     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9204     //
9205     // This is OK if we don't care what happens if either operand is a NaN.
9206     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9207       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9208                                                 CC, TLI, DAG))
9209         return FMinMax;
9210 
9211     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9212     // This is conservatively limited to pre-legal-operations to give targets
9213     // a chance to reverse the transform if they want to do that. Also, it is
9214     // unlikely that the pattern would be formed late, so it's probably not
9215     // worth going through the other checks.
9216     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9217         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9218         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9219       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9220       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9221       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9222         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9223         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9224         //
9225         // The IR equivalent of this transform would have this form:
9226         //   %a = add %x, C
9227         //   %c = icmp ugt %x, ~C
9228         //   %r = select %c, -1, %a
9229         //   =>
9230         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9231         //   %u0 = extractvalue %u, 0
9232         //   %u1 = extractvalue %u, 1
9233         //   %r = select %u1, -1, %u0
9234         SDVTList VTs = DAG.getVTList(VT, VT0);
9235         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9236         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9237       }
9238     }
9239 
9240     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9241         (!LegalOperations &&
9242          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9243       // Any flags available in a select/setcc fold will be on the setcc as they
9244       // migrated from fcmp
9245       Flags = N0.getNode()->getFlags();
9246       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9247                                        N2, N0.getOperand(2));
9248       SelectNode->setFlags(Flags);
9249       return SelectNode;
9250     }
9251 
9252     return SimplifySelect(DL, N0, N1, N2);
9253   }
9254 
9255   return SDValue();
9256 }
9257 
9258 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9259 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9260 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9261   SDLoc DL(N);
9262   SDValue Cond = N->getOperand(0);
9263   SDValue LHS = N->getOperand(1);
9264   SDValue RHS = N->getOperand(2);
9265   EVT VT = N->getValueType(0);
9266   int NumElems = VT.getVectorNumElements();
9267   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9268          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9269          Cond.getOpcode() == ISD::BUILD_VECTOR);
9270 
9271   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9272   // binary ones here.
9273   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9274     return SDValue();
9275 
9276   // We're sure we have an even number of elements due to the
9277   // concat_vectors we have as arguments to vselect.
9278   // Skip BV elements until we find one that's not an UNDEF
9279   // After we find an UNDEF element, keep looping until we get to half the
9280   // length of the BV and see if all the non-undef nodes are the same.
9281   ConstantSDNode *BottomHalf = nullptr;
9282   for (int i = 0; i < NumElems / 2; ++i) {
9283     if (Cond->getOperand(i)->isUndef())
9284       continue;
9285 
9286     if (BottomHalf == nullptr)
9287       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9288     else if (Cond->getOperand(i).getNode() != BottomHalf)
9289       return SDValue();
9290   }
9291 
9292   // Do the same for the second half of the BuildVector
9293   ConstantSDNode *TopHalf = nullptr;
9294   for (int i = NumElems / 2; i < NumElems; ++i) {
9295     if (Cond->getOperand(i)->isUndef())
9296       continue;
9297 
9298     if (TopHalf == nullptr)
9299       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9300     else if (Cond->getOperand(i).getNode() != TopHalf)
9301       return SDValue();
9302   }
9303 
9304   assert(TopHalf && BottomHalf &&
9305          "One half of the selector was all UNDEFs and the other was all the "
9306          "same value. This should have been addressed before this function.");
9307   return DAG.getNode(
9308       ISD::CONCAT_VECTORS, DL, VT,
9309       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9310       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9311 }
9312 
9313 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9314   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9315   SDValue Mask = MSC->getMask();
9316   SDValue Chain = MSC->getChain();
9317   SDLoc DL(N);
9318 
9319   // Zap scatters with a zero mask.
9320   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9321     return Chain;
9322 
9323   return SDValue();
9324 }
9325 
9326 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9327   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9328   SDValue Mask = MST->getMask();
9329   SDValue Chain = MST->getChain();
9330   SDLoc DL(N);
9331 
9332   // Zap masked stores with a zero mask.
9333   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9334     return Chain;
9335 
9336   // If this is a masked load with an all ones mask, we can use a unmasked load.
9337   // FIXME: Can we do this for indexed, compressing, or truncating stores?
9338   if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
9339       MST->isUnindexed() && !MST->isCompressingStore() &&
9340       !MST->isTruncatingStore())
9341     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9342                         MST->getBasePtr(), MST->getMemOperand());
9343 
9344   // Try transforming N to an indexed store.
9345   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9346     return SDValue(N, 0);
9347 
9348   return SDValue();
9349 }
9350 
9351 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9352   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9353   SDValue Mask = MGT->getMask();
9354   SDLoc DL(N);
9355 
9356   // Zap gathers with a zero mask.
9357   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9358     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
9359 
9360   return SDValue();
9361 }
9362 
9363 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9364   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9365   SDValue Mask = MLD->getMask();
9366   SDLoc DL(N);
9367 
9368   // Zap masked loads with a zero mask.
9369   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9370     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9371 
9372   // If this is a masked load with an all ones mask, we can use a unmasked load.
9373   // FIXME: Can we do this for indexed, expanding, or extending loads?
9374   if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
9375       MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9376       MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9377     SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9378                                 MLD->getBasePtr(), MLD->getMemOperand());
9379     return CombineTo(N, NewLd, NewLd.getValue(1));
9380   }
9381 
9382   // Try transforming N to an indexed load.
9383   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9384     return SDValue(N, 0);
9385 
9386   return SDValue();
9387 }
9388 
9389 /// A vector select of 2 constant vectors can be simplified to math/logic to
9390 /// avoid a variable select instruction and possibly avoid constant loads.
9391 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9392   SDValue Cond = N->getOperand(0);
9393   SDValue N1 = N->getOperand(1);
9394   SDValue N2 = N->getOperand(2);
9395   EVT VT = N->getValueType(0);
9396   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9397       !TLI.convertSelectOfConstantsToMath(VT) ||
9398       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9399       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9400     return SDValue();
9401 
9402   // Check if we can use the condition value to increment/decrement a single
9403   // constant value. This simplifies a select to an add and removes a constant
9404   // load/materialization from the general case.
9405   bool AllAddOne = true;
9406   bool AllSubOne = true;
9407   unsigned Elts = VT.getVectorNumElements();
9408   for (unsigned i = 0; i != Elts; ++i) {
9409     SDValue N1Elt = N1.getOperand(i);
9410     SDValue N2Elt = N2.getOperand(i);
9411     if (N1Elt.isUndef() || N2Elt.isUndef())
9412       continue;
9413     if (N1Elt.getValueType() != N2Elt.getValueType())
9414       continue;
9415 
9416     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9417     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9418     if (C1 != C2 + 1)
9419       AllAddOne = false;
9420     if (C1 != C2 - 1)
9421       AllSubOne = false;
9422   }
9423 
9424   // Further simplifications for the extra-special cases where the constants are
9425   // all 0 or all -1 should be implemented as folds of these patterns.
9426   SDLoc DL(N);
9427   if (AllAddOne || AllSubOne) {
9428     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9429     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9430     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9431     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9432     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9433   }
9434 
9435   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9436   APInt Pow2C;
9437   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9438       isNullOrNullSplat(N2)) {
9439     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9440     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9441     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9442   }
9443 
9444   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9445     return V;
9446 
9447   // The general case for select-of-constants:
9448   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9449   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9450   // leave that to a machine-specific pass.
9451   return SDValue();
9452 }
9453 
9454 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9455   SDValue N0 = N->getOperand(0);
9456   SDValue N1 = N->getOperand(1);
9457   SDValue N2 = N->getOperand(2);
9458   EVT VT = N->getValueType(0);
9459   SDLoc DL(N);
9460 
9461   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9462     return V;
9463 
9464   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9465   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9466     return DAG.getSelect(DL, VT, F, N2, N1);
9467 
9468   // Canonicalize integer abs.
9469   // vselect (setg[te] X,  0),  X, -X ->
9470   // vselect (setgt    X, -1),  X, -X ->
9471   // vselect (setl[te] X,  0), -X,  X ->
9472   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9473   if (N0.getOpcode() == ISD::SETCC) {
9474     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9475     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9476     bool isAbs = false;
9477     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9478 
9479     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9480          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9481         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9482       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9483     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9484              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9485       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9486 
9487     if (isAbs) {
9488       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9489         return DAG.getNode(ISD::ABS, DL, VT, LHS);
9490 
9491       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9492                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
9493                                                   DL, getShiftAmountTy(VT)));
9494       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9495       AddToWorklist(Shift.getNode());
9496       AddToWorklist(Add.getNode());
9497       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9498     }
9499 
9500     // vselect x, y (fcmp lt x, y) -> fminnum x, y
9501     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9502     //
9503     // This is OK if we don't care about what happens if either operand is a
9504     // NaN.
9505     //
9506     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9507       if (SDValue FMinMax =
9508               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9509         return FMinMax;
9510     }
9511 
9512     // If this select has a condition (setcc) with narrower operands than the
9513     // select, try to widen the compare to match the select width.
9514     // TODO: This should be extended to handle any constant.
9515     // TODO: This could be extended to handle non-loading patterns, but that
9516     //       requires thorough testing to avoid regressions.
9517     if (isNullOrNullSplat(RHS)) {
9518       EVT NarrowVT = LHS.getValueType();
9519       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9520       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9521       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9522       unsigned WideWidth = WideVT.getScalarSizeInBits();
9523       bool IsSigned = isSignedIntSetCC(CC);
9524       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9525       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
9526           SetCCWidth != 1 && SetCCWidth < WideWidth &&
9527           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
9528           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
9529         // Both compare operands can be widened for free. The LHS can use an
9530         // extended load, and the RHS is a constant:
9531         //   vselect (ext (setcc load(X), C)), N1, N2 -->
9532         //   vselect (setcc extload(X), C'), N1, N2
9533         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9534         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
9535         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
9536         EVT WideSetCCVT = getSetCCResultType(WideVT);
9537         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
9538         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
9539       }
9540     }
9541   }
9542 
9543   if (SimplifySelectOps(N, N1, N2))
9544     return SDValue(N, 0);  // Don't revisit N.
9545 
9546   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
9547   if (ISD::isBuildVectorAllOnes(N0.getNode()))
9548     return N1;
9549   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
9550   if (ISD::isBuildVectorAllZeros(N0.getNode()))
9551     return N2;
9552 
9553   // The ConvertSelectToConcatVector function is assuming both the above
9554   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
9555   // and addressed.
9556   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9557       N2.getOpcode() == ISD::CONCAT_VECTORS &&
9558       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
9559     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
9560       return CV;
9561   }
9562 
9563   if (SDValue V = foldVSelectOfConstants(N))
9564     return V;
9565 
9566   return SDValue();
9567 }
9568 
9569 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
9570   SDValue N0 = N->getOperand(0);
9571   SDValue N1 = N->getOperand(1);
9572   SDValue N2 = N->getOperand(2);
9573   SDValue N3 = N->getOperand(3);
9574   SDValue N4 = N->getOperand(4);
9575   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
9576 
9577   // fold select_cc lhs, rhs, x, x, cc -> x
9578   if (N2 == N3)
9579     return N2;
9580 
9581   // Determine if the condition we're dealing with is constant
9582   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
9583                                   CC, SDLoc(N), false)) {
9584     AddToWorklist(SCC.getNode());
9585 
9586     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
9587       if (!SCCC->isNullValue())
9588         return N2;    // cond always true -> true val
9589       else
9590         return N3;    // cond always false -> false val
9591     } else if (SCC->isUndef()) {
9592       // When the condition is UNDEF, just return the first operand. This is
9593       // coherent the DAG creation, no setcc node is created in this case
9594       return N2;
9595     } else if (SCC.getOpcode() == ISD::SETCC) {
9596       // Fold to a simpler select_cc
9597       SDValue SelectOp = DAG.getNode(
9598           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
9599           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
9600       SelectOp->setFlags(SCC->getFlags());
9601       return SelectOp;
9602     }
9603   }
9604 
9605   // If we can fold this based on the true/false value, do so.
9606   if (SimplifySelectOps(N, N2, N3))
9607     return SDValue(N, 0);  // Don't revisit N.
9608 
9609   // fold select_cc into other things, such as min/max/abs
9610   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
9611 }
9612 
9613 SDValue DAGCombiner::visitSETCC(SDNode *N) {
9614   // setcc is very commonly used as an argument to brcond. This pattern
9615   // also lend itself to numerous combines and, as a result, it is desired
9616   // we keep the argument to a brcond as a setcc as much as possible.
9617   bool PreferSetCC =
9618       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
9619 
9620   SDValue Combined = SimplifySetCC(
9621       N->getValueType(0), N->getOperand(0), N->getOperand(1),
9622       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
9623 
9624   if (!Combined)
9625     return SDValue();
9626 
9627   // If we prefer to have a setcc, and we don't, we'll try our best to
9628   // recreate one using rebuildSetCC.
9629   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
9630     SDValue NewSetCC = rebuildSetCC(Combined);
9631 
9632     // We don't have anything interesting to combine to.
9633     if (NewSetCC.getNode() == N)
9634       return SDValue();
9635 
9636     if (NewSetCC)
9637       return NewSetCC;
9638   }
9639 
9640   return Combined;
9641 }
9642 
9643 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
9644   SDValue LHS = N->getOperand(0);
9645   SDValue RHS = N->getOperand(1);
9646   SDValue Carry = N->getOperand(2);
9647   SDValue Cond = N->getOperand(3);
9648 
9649   // If Carry is false, fold to a regular SETCC.
9650   if (isNullConstant(Carry))
9651     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9652 
9653   return SDValue();
9654 }
9655 
9656 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9657 /// a build_vector of constants.
9658 /// This function is called by the DAGCombiner when visiting sext/zext/aext
9659 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9660 /// Vector extends are not folded if operations are legal; this is to
9661 /// avoid introducing illegal build_vector dag nodes.
9662 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9663                                          SelectionDAG &DAG, bool LegalTypes) {
9664   unsigned Opcode = N->getOpcode();
9665   SDValue N0 = N->getOperand(0);
9666   EVT VT = N->getValueType(0);
9667   SDLoc DL(N);
9668 
9669   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9670          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9671          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
9672          && "Expected EXTEND dag node in input!");
9673 
9674   // fold (sext c1) -> c1
9675   // fold (zext c1) -> c1
9676   // fold (aext c1) -> c1
9677   if (isa<ConstantSDNode>(N0))
9678     return DAG.getNode(Opcode, DL, VT, N0);
9679 
9680   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9681   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9682   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9683   if (N0->getOpcode() == ISD::SELECT) {
9684     SDValue Op1 = N0->getOperand(1);
9685     SDValue Op2 = N0->getOperand(2);
9686     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9687         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9688       // For any_extend, choose sign extension of the constants to allow a
9689       // possible further transform to sign_extend_inreg.i.e.
9690       //
9691       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9692       // t2: i64 = any_extend t1
9693       // -->
9694       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9695       // -->
9696       // t4: i64 = sign_extend_inreg t3
9697       unsigned FoldOpc = Opcode;
9698       if (FoldOpc == ISD::ANY_EXTEND)
9699         FoldOpc = ISD::SIGN_EXTEND;
9700       return DAG.getSelect(DL, VT, N0->getOperand(0),
9701                            DAG.getNode(FoldOpc, DL, VT, Op1),
9702                            DAG.getNode(FoldOpc, DL, VT, Op2));
9703     }
9704   }
9705 
9706   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9707   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9708   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9709   EVT SVT = VT.getScalarType();
9710   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
9711       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
9712     return SDValue();
9713 
9714   // We can fold this node into a build_vector.
9715   unsigned VTBits = SVT.getSizeInBits();
9716   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9717   SmallVector<SDValue, 8> Elts;
9718   unsigned NumElts = VT.getVectorNumElements();
9719 
9720   // For zero-extensions, UNDEF elements still guarantee to have the upper
9721   // bits set to zero.
9722   bool IsZext =
9723       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
9724 
9725   for (unsigned i = 0; i != NumElts; ++i) {
9726     SDValue Op = N0.getOperand(i);
9727     if (Op.isUndef()) {
9728       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
9729       continue;
9730     }
9731 
9732     SDLoc DL(Op);
9733     // Get the constant value and if needed trunc it to the size of the type.
9734     // Nodes like build_vector might have constants wider than the scalar type.
9735     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9736     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
9737       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9738     else
9739       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9740   }
9741 
9742   return DAG.getBuildVector(VT, DL, Elts);
9743 }
9744 
9745 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9746 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9747 // transformation. Returns true if extension are possible and the above
9748 // mentioned transformation is profitable.
9749 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
9750                                     unsigned ExtOpc,
9751                                     SmallVectorImpl<SDNode *> &ExtendNodes,
9752                                     const TargetLowering &TLI) {
9753   bool HasCopyToRegUses = false;
9754   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9755   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9756                             UE = N0.getNode()->use_end();
9757        UI != UE; ++UI) {
9758     SDNode *User = *UI;
9759     if (User == N)
9760       continue;
9761     if (UI.getUse().getResNo() != N0.getResNo())
9762       continue;
9763     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9764     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
9765       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9766       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
9767         // Sign bits will be lost after a zext.
9768         return false;
9769       bool Add = false;
9770       for (unsigned i = 0; i != 2; ++i) {
9771         SDValue UseOp = User->getOperand(i);
9772         if (UseOp == N0)
9773           continue;
9774         if (!isa<ConstantSDNode>(UseOp))
9775           return false;
9776         Add = true;
9777       }
9778       if (Add)
9779         ExtendNodes.push_back(User);
9780       continue;
9781     }
9782     // If truncates aren't free and there are users we can't
9783     // extend, it isn't worthwhile.
9784     if (!isTruncFree)
9785       return false;
9786     // Remember if this value is live-out.
9787     if (User->getOpcode() == ISD::CopyToReg)
9788       HasCopyToRegUses = true;
9789   }
9790 
9791   if (HasCopyToRegUses) {
9792     bool BothLiveOut = false;
9793     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9794          UI != UE; ++UI) {
9795       SDUse &Use = UI.getUse();
9796       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9797         BothLiveOut = true;
9798         break;
9799       }
9800     }
9801     if (BothLiveOut)
9802       // Both unextended and extended values are live out. There had better be
9803       // a good reason for the transformation.
9804       return ExtendNodes.size();
9805   }
9806   return true;
9807 }
9808 
9809 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9810                                   SDValue OrigLoad, SDValue ExtLoad,
9811                                   ISD::NodeType ExtType) {
9812   // Extend SetCC uses if necessary.
9813   SDLoc DL(ExtLoad);
9814   for (SDNode *SetCC : SetCCs) {
9815     SmallVector<SDValue, 4> Ops;
9816 
9817     for (unsigned j = 0; j != 2; ++j) {
9818       SDValue SOp = SetCC->getOperand(j);
9819       if (SOp == OrigLoad)
9820         Ops.push_back(ExtLoad);
9821       else
9822         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9823     }
9824 
9825     Ops.push_back(SetCC->getOperand(2));
9826     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9827   }
9828 }
9829 
9830 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9831 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9832   SDValue N0 = N->getOperand(0);
9833   EVT DstVT = N->getValueType(0);
9834   EVT SrcVT = N0.getValueType();
9835 
9836   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9837           N->getOpcode() == ISD::ZERO_EXTEND) &&
9838          "Unexpected node type (not an extend)!");
9839 
9840   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9841   // For example, on a target with legal v4i32, but illegal v8i32, turn:
9842   //   (v8i32 (sext (v8i16 (load x))))
9843   // into:
9844   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
9845   //                          (v4i32 (sextload (x + 16)))))
9846   // Where uses of the original load, i.e.:
9847   //   (v8i16 (load x))
9848   // are replaced with:
9849   //   (v8i16 (truncate
9850   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
9851   //                            (v4i32 (sextload (x + 16)))))))
9852   //
9853   // This combine is only applicable to illegal, but splittable, vectors.
9854   // All legal types, and illegal non-vector types, are handled elsewhere.
9855   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9856   //
9857   if (N0->getOpcode() != ISD::LOAD)
9858     return SDValue();
9859 
9860   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9861 
9862   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9863       !N0.hasOneUse() || !LN0->isSimple() ||
9864       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
9865       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9866     return SDValue();
9867 
9868   SmallVector<SDNode *, 4> SetCCs;
9869   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9870     return SDValue();
9871 
9872   ISD::LoadExtType ExtType =
9873       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9874 
9875   // Try to split the vector types to get down to legal types.
9876   EVT SplitSrcVT = SrcVT;
9877   EVT SplitDstVT = DstVT;
9878   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9879          SplitSrcVT.getVectorNumElements() > 1) {
9880     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9881     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9882   }
9883 
9884   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9885     return SDValue();
9886 
9887   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
9888 
9889   SDLoc DL(N);
9890   const unsigned NumSplits =
9891       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9892   const unsigned Stride = SplitSrcVT.getStoreSize();
9893   SmallVector<SDValue, 4> Loads;
9894   SmallVector<SDValue, 4> Chains;
9895 
9896   SDValue BasePtr = LN0->getBasePtr();
9897   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9898     const unsigned Offset = Idx * Stride;
9899     const Align Align = commonAlignment(LN0->getAlign(), Offset);
9900 
9901     SDValue SplitLoad = DAG.getExtLoad(
9902         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9903         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9904         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9905 
9906     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
9907 
9908     Loads.push_back(SplitLoad.getValue(0));
9909     Chains.push_back(SplitLoad.getValue(1));
9910   }
9911 
9912   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9913   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9914 
9915   // Simplify TF.
9916   AddToWorklist(NewChain.getNode());
9917 
9918   CombineTo(N, NewValue);
9919 
9920   // Replace uses of the original load (before extension)
9921   // with a truncate of the concatenated sextloaded vectors.
9922   SDValue Trunc =
9923       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9924   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9925   CombineTo(N0.getNode(), Trunc, NewChain);
9926   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9927 }
9928 
9929 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9930 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9931 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9932   assert(N->getOpcode() == ISD::ZERO_EXTEND);
9933   EVT VT = N->getValueType(0);
9934   EVT OrigVT = N->getOperand(0).getValueType();
9935   if (TLI.isZExtFree(OrigVT, VT))
9936     return SDValue();
9937 
9938   // and/or/xor
9939   SDValue N0 = N->getOperand(0);
9940   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9941         N0.getOpcode() == ISD::XOR) ||
9942       N0.getOperand(1).getOpcode() != ISD::Constant ||
9943       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9944     return SDValue();
9945 
9946   // shl/shr
9947   SDValue N1 = N0->getOperand(0);
9948   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9949       N1.getOperand(1).getOpcode() != ISD::Constant ||
9950       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9951     return SDValue();
9952 
9953   // load
9954   if (!isa<LoadSDNode>(N1.getOperand(0)))
9955     return SDValue();
9956   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9957   EVT MemVT = Load->getMemoryVT();
9958   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9959       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9960     return SDValue();
9961 
9962 
9963   // If the shift op is SHL, the logic op must be AND, otherwise the result
9964   // will be wrong.
9965   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9966     return SDValue();
9967 
9968   if (!N0.hasOneUse() || !N1.hasOneUse())
9969     return SDValue();
9970 
9971   SmallVector<SDNode*, 4> SetCCs;
9972   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9973                                ISD::ZERO_EXTEND, SetCCs, TLI))
9974     return SDValue();
9975 
9976   // Actually do the transformation.
9977   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9978                                    Load->getChain(), Load->getBasePtr(),
9979                                    Load->getMemoryVT(), Load->getMemOperand());
9980 
9981   SDLoc DL1(N1);
9982   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9983                               N1.getOperand(1));
9984 
9985   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
9986   SDLoc DL0(N0);
9987   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9988                             DAG.getConstant(Mask, DL0, VT));
9989 
9990   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9991   CombineTo(N, And);
9992   if (SDValue(Load, 0).hasOneUse()) {
9993     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9994   } else {
9995     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9996                                 Load->getValueType(0), ExtLoad);
9997     CombineTo(Load, Trunc, ExtLoad.getValue(1));
9998   }
9999 
10000   // N0 is dead at this point.
10001   recursivelyDeleteUnusedNodes(N0.getNode());
10002 
10003   return SDValue(N,0); // Return N so it doesn't get rechecked!
10004 }
10005 
10006 /// If we're narrowing or widening the result of a vector select and the final
10007 /// size is the same size as a setcc (compare) feeding the select, then try to
10008 /// apply the cast operation to the select's operands because matching vector
10009 /// sizes for a select condition and other operands should be more efficient.
10010 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10011   unsigned CastOpcode = Cast->getOpcode();
10012   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
10013           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
10014           CastOpcode == ISD::FP_ROUND) &&
10015          "Unexpected opcode for vector select narrowing/widening");
10016 
10017   // We only do this transform before legal ops because the pattern may be
10018   // obfuscated by target-specific operations after legalization. Do not create
10019   // an illegal select op, however, because that may be difficult to lower.
10020   EVT VT = Cast->getValueType(0);
10021   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10022     return SDValue();
10023 
10024   SDValue VSel = Cast->getOperand(0);
10025   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
10026       VSel.getOperand(0).getOpcode() != ISD::SETCC)
10027     return SDValue();
10028 
10029   // Does the setcc have the same vector size as the casted select?
10030   SDValue SetCC = VSel.getOperand(0);
10031   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10032   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10033     return SDValue();
10034 
10035   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10036   SDValue A = VSel.getOperand(1);
10037   SDValue B = VSel.getOperand(2);
10038   SDValue CastA, CastB;
10039   SDLoc DL(Cast);
10040   if (CastOpcode == ISD::FP_ROUND) {
10041     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
10042     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10043     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10044   } else {
10045     CastA = DAG.getNode(CastOpcode, DL, VT, A);
10046     CastB = DAG.getNode(CastOpcode, DL, VT, B);
10047   }
10048   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10049 }
10050 
10051 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10052 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10053 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10054                                      const TargetLowering &TLI, EVT VT,
10055                                      bool LegalOperations, SDNode *N,
10056                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
10057   SDNode *N0Node = N0.getNode();
10058   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10059                                                    : ISD::isZEXTLoad(N0Node);
10060   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
10061       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
10062     return SDValue();
10063 
10064   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10065   EVT MemVT = LN0->getMemoryVT();
10066   if ((LegalOperations || !LN0->isSimple() ||
10067        VT.isVector()) &&
10068       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10069     return SDValue();
10070 
10071   SDValue ExtLoad =
10072       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10073                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10074   Combiner.CombineTo(N, ExtLoad);
10075   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10076   if (LN0->use_empty())
10077     Combiner.recursivelyDeleteUnusedNodes(LN0);
10078   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10079 }
10080 
10081 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10082 // Only generate vector extloads when 1) they're legal, and 2) they are
10083 // deemed desirable by the target.
10084 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10085                                   const TargetLowering &TLI, EVT VT,
10086                                   bool LegalOperations, SDNode *N, SDValue N0,
10087                                   ISD::LoadExtType ExtLoadType,
10088                                   ISD::NodeType ExtOpc) {
10089   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
10090       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
10091       ((LegalOperations || VT.isVector() ||
10092         !cast<LoadSDNode>(N0)->isSimple()) &&
10093        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10094     return {};
10095 
10096   bool DoXform = true;
10097   SmallVector<SDNode *, 4> SetCCs;
10098   if (!N0.hasOneUse())
10099     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10100   if (VT.isVector())
10101     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10102   if (!DoXform)
10103     return {};
10104 
10105   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10106   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10107                                    LN0->getBasePtr(), N0.getValueType(),
10108                                    LN0->getMemOperand());
10109   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10110   // If the load value is used only by N, replace it via CombineTo N.
10111   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10112   Combiner.CombineTo(N, ExtLoad);
10113   if (NoReplaceTrunc) {
10114     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10115     Combiner.recursivelyDeleteUnusedNodes(LN0);
10116   } else {
10117     SDValue Trunc =
10118         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10119     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10120   }
10121   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10122 }
10123 
10124 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10125                                         const TargetLowering &TLI, EVT VT,
10126                                         SDNode *N, SDValue N0,
10127                                         ISD::LoadExtType ExtLoadType,
10128                                         ISD::NodeType ExtOpc) {
10129   if (!N0.hasOneUse())
10130     return SDValue();
10131 
10132   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10133   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
10134     return SDValue();
10135 
10136   if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
10137     return SDValue();
10138 
10139   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10140     return SDValue();
10141 
10142   SDLoc dl(Ld);
10143   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10144   SDValue NewLoad = DAG.getMaskedLoad(
10145       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10146       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10147       ExtLoadType, Ld->isExpandingLoad());
10148   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10149   return NewLoad;
10150 }
10151 
10152 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10153                                        bool LegalOperations) {
10154   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10155           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
10156 
10157   SDValue SetCC = N->getOperand(0);
10158   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
10159       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
10160     return SDValue();
10161 
10162   SDValue X = SetCC.getOperand(0);
10163   SDValue Ones = SetCC.getOperand(1);
10164   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10165   EVT VT = N->getValueType(0);
10166   EVT XVT = X.getValueType();
10167   // setge X, C is canonicalized to setgt, so we do not need to match that
10168   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10169   // not require the 'not' op.
10170   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10171     // Invert and smear/shift the sign bit:
10172     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10173     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10174     SDLoc DL(N);
10175     unsigned ShCt = VT.getSizeInBits() - 1;
10176     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10177     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10178       SDValue NotX = DAG.getNOT(DL, X, VT);
10179       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10180       auto ShiftOpcode =
10181         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10182       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10183     }
10184   }
10185   return SDValue();
10186 }
10187 
10188 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
10189   SDValue N0 = N->getOperand(0);
10190   EVT VT = N->getValueType(0);
10191   SDLoc DL(N);
10192 
10193   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10194     return Res;
10195 
10196   // fold (sext (sext x)) -> (sext x)
10197   // fold (sext (aext x)) -> (sext x)
10198   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10199     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
10200 
10201   if (N0.getOpcode() == ISD::TRUNCATE) {
10202     // fold (sext (truncate (load x))) -> (sext (smaller load x))
10203     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
10204     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10205       SDNode *oye = N0.getOperand(0).getNode();
10206       if (NarrowLoad.getNode() != N0.getNode()) {
10207         CombineTo(N0.getNode(), NarrowLoad);
10208         // CombineTo deleted the truncate, if needed, but not what's under it.
10209         AddToWorklist(oye);
10210       }
10211       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10212     }
10213 
10214     // See if the value being truncated is already sign extended.  If so, just
10215     // eliminate the trunc/sext pair.
10216     SDValue Op = N0.getOperand(0);
10217     unsigned OpBits   = Op.getScalarValueSizeInBits();
10218     unsigned MidBits  = N0.getScalarValueSizeInBits();
10219     unsigned DestBits = VT.getScalarSizeInBits();
10220     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
10221 
10222     if (OpBits == DestBits) {
10223       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
10224       // bits, it is already ready.
10225       if (NumSignBits > DestBits-MidBits)
10226         return Op;
10227     } else if (OpBits < DestBits) {
10228       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
10229       // bits, just sext from i32.
10230       if (NumSignBits > OpBits-MidBits)
10231         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
10232     } else {
10233       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
10234       // bits, just truncate to i32.
10235       if (NumSignBits > OpBits-MidBits)
10236         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
10237     }
10238 
10239     // fold (sext (truncate x)) -> (sextinreg x).
10240     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
10241                                                  N0.getValueType())) {
10242       if (OpBits < DestBits)
10243         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
10244       else if (OpBits > DestBits)
10245         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
10246       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
10247                          DAG.getValueType(N0.getValueType()));
10248     }
10249   }
10250 
10251   // Try to simplify (sext (load x)).
10252   if (SDValue foldedExt =
10253           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10254                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
10255     return foldedExt;
10256 
10257   if (SDValue foldedExt =
10258       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
10259                                ISD::SIGN_EXTEND))
10260     return foldedExt;
10261 
10262   // fold (sext (load x)) to multiple smaller sextloads.
10263   // Only on illegal but splittable vectors.
10264   if (SDValue ExtLoad = CombineExtLoad(N))
10265     return ExtLoad;
10266 
10267   // Try to simplify (sext (sextload x)).
10268   if (SDValue foldedExt = tryToFoldExtOfExtload(
10269           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
10270     return foldedExt;
10271 
10272   // fold (sext (and/or/xor (load x), cst)) ->
10273   //      (and/or/xor (sextload x), (sext cst))
10274   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10275        N0.getOpcode() == ISD::XOR) &&
10276       isa<LoadSDNode>(N0.getOperand(0)) &&
10277       N0.getOperand(1).getOpcode() == ISD::Constant &&
10278       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10279     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10280     EVT MemVT = LN00->getMemoryVT();
10281     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
10282       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
10283       SmallVector<SDNode*, 4> SetCCs;
10284       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10285                                              ISD::SIGN_EXTEND, SetCCs, TLI);
10286       if (DoXform) {
10287         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
10288                                          LN00->getChain(), LN00->getBasePtr(),
10289                                          LN00->getMemoryVT(),
10290                                          LN00->getMemOperand());
10291         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
10292         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10293                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10294         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
10295         bool NoReplaceTruncAnd = !N0.hasOneUse();
10296         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10297         CombineTo(N, And);
10298         // If N0 has multiple uses, change other uses as well.
10299         if (NoReplaceTruncAnd) {
10300           SDValue TruncAnd =
10301               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10302           CombineTo(N0.getNode(), TruncAnd);
10303         }
10304         if (NoReplaceTrunc) {
10305           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10306         } else {
10307           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10308                                       LN00->getValueType(0), ExtLoad);
10309           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10310         }
10311         return SDValue(N,0); // Return N so it doesn't get rechecked!
10312       }
10313     }
10314   }
10315 
10316   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10317     return V;
10318 
10319   if (N0.getOpcode() == ISD::SETCC) {
10320     SDValue N00 = N0.getOperand(0);
10321     SDValue N01 = N0.getOperand(1);
10322     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10323     EVT N00VT = N00.getValueType();
10324 
10325     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
10326     // Only do this before legalize for now.
10327     if (VT.isVector() && !LegalOperations &&
10328         TLI.getBooleanContents(N00VT) ==
10329             TargetLowering::ZeroOrNegativeOneBooleanContent) {
10330       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10331       // of the same size as the compared operands. Only optimize sext(setcc())
10332       // if this is the case.
10333       EVT SVT = getSetCCResultType(N00VT);
10334 
10335       // If we already have the desired type, don't change it.
10336       if (SVT != N0.getValueType()) {
10337         // We know that the # elements of the results is the same as the
10338         // # elements of the compare (and the # elements of the compare result
10339         // for that matter).  Check to see that they are the same size.  If so,
10340         // we know that the element size of the sext'd result matches the
10341         // element size of the compare operands.
10342         if (VT.getSizeInBits() == SVT.getSizeInBits())
10343           return DAG.getSetCC(DL, VT, N00, N01, CC);
10344 
10345         // If the desired elements are smaller or larger than the source
10346         // elements, we can use a matching integer vector type and then
10347         // truncate/sign extend.
10348         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10349         if (SVT == MatchingVecType) {
10350           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10351           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10352         }
10353       }
10354     }
10355 
10356     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
10357     // Here, T can be 1 or -1, depending on the type of the setcc and
10358     // getBooleanContents().
10359     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
10360 
10361     // To determine the "true" side of the select, we need to know the high bit
10362     // of the value returned by the setcc if it evaluates to true.
10363     // If the type of the setcc is i1, then the true case of the select is just
10364     // sext(i1 1), that is, -1.
10365     // If the type of the setcc is larger (say, i8) then the value of the high
10366     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
10367     // of the appropriate width.
10368     SDValue ExtTrueVal = (SetCCWidth == 1)
10369                              ? DAG.getAllOnesConstant(DL, VT)
10370                              : DAG.getBoolConstant(true, DL, VT, N00VT);
10371     SDValue Zero = DAG.getConstant(0, DL, VT);
10372     if (SDValue SCC =
10373             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
10374       return SCC;
10375 
10376     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
10377       EVT SetCCVT = getSetCCResultType(N00VT);
10378       // Don't do this transform for i1 because there's a select transform
10379       // that would reverse it.
10380       // TODO: We should not do this transform at all without a target hook
10381       // because a sext is likely cheaper than a select?
10382       if (SetCCVT.getScalarSizeInBits() != 1 &&
10383           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
10384         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
10385         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
10386       }
10387     }
10388   }
10389 
10390   // fold (sext x) -> (zext x) if the sign bit is known zero.
10391   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
10392       DAG.SignBitIsZero(N0))
10393     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
10394 
10395   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10396     return NewVSel;
10397 
10398   // Eliminate this sign extend by doing a negation in the destination type:
10399   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
10400   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
10401       isNullOrNullSplat(N0.getOperand(0)) &&
10402       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
10403       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
10404     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
10405     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
10406   }
10407   // Eliminate this sign extend by doing a decrement in the destination type:
10408   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
10409   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
10410       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
10411       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10412       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
10413     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
10414     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10415   }
10416 
10417   return SDValue();
10418 }
10419 
10420 // isTruncateOf - If N is a truncate of some other value, return true, record
10421 // the value being truncated in Op and which of Op's bits are zero/one in Known.
10422 // This function computes KnownBits to avoid a duplicated call to
10423 // computeKnownBits in the caller.
10424 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
10425                          KnownBits &Known) {
10426   if (N->getOpcode() == ISD::TRUNCATE) {
10427     Op = N->getOperand(0);
10428     Known = DAG.computeKnownBits(Op);
10429     return true;
10430   }
10431 
10432   if (N.getOpcode() != ISD::SETCC ||
10433       N.getValueType().getScalarType() != MVT::i1 ||
10434       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
10435     return false;
10436 
10437   SDValue Op0 = N->getOperand(0);
10438   SDValue Op1 = N->getOperand(1);
10439   assert(Op0.getValueType() == Op1.getValueType());
10440 
10441   if (isNullOrNullSplat(Op0))
10442     Op = Op1;
10443   else if (isNullOrNullSplat(Op1))
10444     Op = Op0;
10445   else
10446     return false;
10447 
10448   Known = DAG.computeKnownBits(Op);
10449 
10450   return (Known.Zero | 1).isAllOnesValue();
10451 }
10452 
10453 /// Given an extending node with a pop-count operand, if the target does not
10454 /// support a pop-count in the narrow source type but does support it in the
10455 /// destination type, widen the pop-count to the destination type.
10456 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
10457   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
10458           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
10459 
10460   SDValue CtPop = Extend->getOperand(0);
10461   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
10462     return SDValue();
10463 
10464   EVT VT = Extend->getValueType(0);
10465   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10466   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
10467       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
10468     return SDValue();
10469 
10470   // zext (ctpop X) --> ctpop (zext X)
10471   SDLoc DL(Extend);
10472   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
10473   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
10474 }
10475 
10476 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
10477   SDValue N0 = N->getOperand(0);
10478   EVT VT = N->getValueType(0);
10479 
10480   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10481     return Res;
10482 
10483   // fold (zext (zext x)) -> (zext x)
10484   // fold (zext (aext x)) -> (zext x)
10485   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10486     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
10487                        N0.getOperand(0));
10488 
10489   // fold (zext (truncate x)) -> (zext x) or
10490   //      (zext (truncate x)) -> (truncate x)
10491   // This is valid when the truncated bits of x are already zero.
10492   SDValue Op;
10493   KnownBits Known;
10494   if (isTruncateOf(DAG, N0, Op, Known)) {
10495     APInt TruncatedBits =
10496       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
10497       APInt(Op.getScalarValueSizeInBits(), 0) :
10498       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
10499                         N0.getScalarValueSizeInBits(),
10500                         std::min(Op.getScalarValueSizeInBits(),
10501                                  VT.getScalarSizeInBits()));
10502     if (TruncatedBits.isSubsetOf(Known.Zero))
10503       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10504   }
10505 
10506   // fold (zext (truncate x)) -> (and x, mask)
10507   if (N0.getOpcode() == ISD::TRUNCATE) {
10508     // fold (zext (truncate (load x))) -> (zext (smaller load x))
10509     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
10510     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10511       SDNode *oye = N0.getOperand(0).getNode();
10512       if (NarrowLoad.getNode() != N0.getNode()) {
10513         CombineTo(N0.getNode(), NarrowLoad);
10514         // CombineTo deleted the truncate, if needed, but not what's under it.
10515         AddToWorklist(oye);
10516       }
10517       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10518     }
10519 
10520     EVT SrcVT = N0.getOperand(0).getValueType();
10521     EVT MinVT = N0.getValueType();
10522 
10523     // Try to mask before the extension to avoid having to generate a larger mask,
10524     // possibly over several sub-vectors.
10525     if (SrcVT.bitsLT(VT) && VT.isVector()) {
10526       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
10527                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
10528         SDValue Op = N0.getOperand(0);
10529         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10530         AddToWorklist(Op.getNode());
10531         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10532         // Transfer the debug info; the new node is equivalent to N0.
10533         DAG.transferDbgValues(N0, ZExtOrTrunc);
10534         return ZExtOrTrunc;
10535       }
10536     }
10537 
10538     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
10539       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10540       AddToWorklist(Op.getNode());
10541       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10542       // We may safely transfer the debug info describing the truncate node over
10543       // to the equivalent and operation.
10544       DAG.transferDbgValues(N0, And);
10545       return And;
10546     }
10547   }
10548 
10549   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
10550   // if either of the casts is not free.
10551   if (N0.getOpcode() == ISD::AND &&
10552       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10553       N0.getOperand(1).getOpcode() == ISD::Constant &&
10554       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10555                            N0.getValueType()) ||
10556        !TLI.isZExtFree(N0.getValueType(), VT))) {
10557     SDValue X = N0.getOperand(0).getOperand(0);
10558     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
10559     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10560     SDLoc DL(N);
10561     return DAG.getNode(ISD::AND, DL, VT,
10562                        X, DAG.getConstant(Mask, DL, VT));
10563   }
10564 
10565   // Try to simplify (zext (load x)).
10566   if (SDValue foldedExt =
10567           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10568                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
10569     return foldedExt;
10570 
10571   if (SDValue foldedExt =
10572       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
10573                                ISD::ZERO_EXTEND))
10574     return foldedExt;
10575 
10576   // fold (zext (load x)) to multiple smaller zextloads.
10577   // Only on illegal but splittable vectors.
10578   if (SDValue ExtLoad = CombineExtLoad(N))
10579     return ExtLoad;
10580 
10581   // fold (zext (and/or/xor (load x), cst)) ->
10582   //      (and/or/xor (zextload x), (zext cst))
10583   // Unless (and (load x) cst) will match as a zextload already and has
10584   // additional users.
10585   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10586        N0.getOpcode() == ISD::XOR) &&
10587       isa<LoadSDNode>(N0.getOperand(0)) &&
10588       N0.getOperand(1).getOpcode() == ISD::Constant &&
10589       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10590     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10591     EVT MemVT = LN00->getMemoryVT();
10592     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
10593         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
10594       bool DoXform = true;
10595       SmallVector<SDNode*, 4> SetCCs;
10596       if (!N0.hasOneUse()) {
10597         if (N0.getOpcode() == ISD::AND) {
10598           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
10599           EVT LoadResultTy = AndC->getValueType(0);
10600           EVT ExtVT;
10601           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
10602             DoXform = false;
10603         }
10604       }
10605       if (DoXform)
10606         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10607                                           ISD::ZERO_EXTEND, SetCCs, TLI);
10608       if (DoXform) {
10609         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
10610                                          LN00->getChain(), LN00->getBasePtr(),
10611                                          LN00->getMemoryVT(),
10612                                          LN00->getMemOperand());
10613         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10614         SDLoc DL(N);
10615         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10616                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10617         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10618         bool NoReplaceTruncAnd = !N0.hasOneUse();
10619         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10620         CombineTo(N, And);
10621         // If N0 has multiple uses, change other uses as well.
10622         if (NoReplaceTruncAnd) {
10623           SDValue TruncAnd =
10624               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10625           CombineTo(N0.getNode(), TruncAnd);
10626         }
10627         if (NoReplaceTrunc) {
10628           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10629         } else {
10630           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10631                                       LN00->getValueType(0), ExtLoad);
10632           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10633         }
10634         return SDValue(N,0); // Return N so it doesn't get rechecked!
10635       }
10636     }
10637   }
10638 
10639   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10640   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10641   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
10642     return ZExtLoad;
10643 
10644   // Try to simplify (zext (zextload x)).
10645   if (SDValue foldedExt = tryToFoldExtOfExtload(
10646           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
10647     return foldedExt;
10648 
10649   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10650     return V;
10651 
10652   if (N0.getOpcode() == ISD::SETCC) {
10653     // Only do this before legalize for now.
10654     if (!LegalOperations && VT.isVector() &&
10655         N0.getValueType().getVectorElementType() == MVT::i1) {
10656       EVT N00VT = N0.getOperand(0).getValueType();
10657       if (getSetCCResultType(N00VT) == N0.getValueType())
10658         return SDValue();
10659 
10660       // We know that the # elements of the results is the same as the #
10661       // elements of the compare (and the # elements of the compare result for
10662       // that matter). Check to see that they are the same size. If so, we know
10663       // that the element size of the sext'd result matches the element size of
10664       // the compare operands.
10665       SDLoc DL(N);
10666       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
10667         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
10668         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
10669                                      N0.getOperand(1), N0.getOperand(2));
10670         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
10671       }
10672 
10673       // If the desired elements are smaller or larger than the source
10674       // elements we can use a matching integer vector type and then
10675       // truncate/any extend followed by zext_in_reg.
10676       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10677       SDValue VsetCC =
10678           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
10679                       N0.getOperand(1), N0.getOperand(2));
10680       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
10681                                     N0.getValueType());
10682     }
10683 
10684     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
10685     SDLoc DL(N);
10686     EVT N0VT = N0.getValueType();
10687     EVT N00VT = N0.getOperand(0).getValueType();
10688     if (SDValue SCC = SimplifySelectCC(
10689             DL, N0.getOperand(0), N0.getOperand(1),
10690             DAG.getBoolConstant(true, DL, N0VT, N00VT),
10691             DAG.getBoolConstant(false, DL, N0VT, N00VT),
10692             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10693       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
10694   }
10695 
10696   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
10697   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10698       isa<ConstantSDNode>(N0.getOperand(1)) &&
10699       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10700       N0.hasOneUse()) {
10701     SDValue ShAmt = N0.getOperand(1);
10702     if (N0.getOpcode() == ISD::SHL) {
10703       SDValue InnerZExt = N0.getOperand(0);
10704       // If the original shl may be shifting out bits, do not perform this
10705       // transformation.
10706       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
10707         InnerZExt.getOperand(0).getValueSizeInBits();
10708       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
10709         return SDValue();
10710     }
10711 
10712     SDLoc DL(N);
10713 
10714     // Ensure that the shift amount is wide enough for the shifted value.
10715     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
10716       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
10717 
10718     return DAG.getNode(N0.getOpcode(), DL, VT,
10719                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10720                        ShAmt);
10721   }
10722 
10723   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10724     return NewVSel;
10725 
10726   if (SDValue NewCtPop = widenCtPop(N, DAG))
10727     return NewCtPop;
10728 
10729   return SDValue();
10730 }
10731 
10732 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10733   SDValue N0 = N->getOperand(0);
10734   EVT VT = N->getValueType(0);
10735 
10736   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10737     return Res;
10738 
10739   // fold (aext (aext x)) -> (aext x)
10740   // fold (aext (zext x)) -> (zext x)
10741   // fold (aext (sext x)) -> (sext x)
10742   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
10743       N0.getOpcode() == ISD::ZERO_EXTEND ||
10744       N0.getOpcode() == ISD::SIGN_EXTEND)
10745     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10746 
10747   // fold (aext (truncate (load x))) -> (aext (smaller load x))
10748   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10749   if (N0.getOpcode() == ISD::TRUNCATE) {
10750     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10751       SDNode *oye = N0.getOperand(0).getNode();
10752       if (NarrowLoad.getNode() != N0.getNode()) {
10753         CombineTo(N0.getNode(), NarrowLoad);
10754         // CombineTo deleted the truncate, if needed, but not what's under it.
10755         AddToWorklist(oye);
10756       }
10757       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10758     }
10759   }
10760 
10761   // fold (aext (truncate x))
10762   if (N0.getOpcode() == ISD::TRUNCATE)
10763     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10764 
10765   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10766   // if the trunc is not free.
10767   if (N0.getOpcode() == ISD::AND &&
10768       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10769       N0.getOperand(1).getOpcode() == ISD::Constant &&
10770       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10771                           N0.getValueType())) {
10772     SDLoc DL(N);
10773     SDValue X = N0.getOperand(0).getOperand(0);
10774     X = DAG.getAnyExtOrTrunc(X, DL, VT);
10775     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10776     return DAG.getNode(ISD::AND, DL, VT,
10777                        X, DAG.getConstant(Mask, DL, VT));
10778   }
10779 
10780   // fold (aext (load x)) -> (aext (truncate (extload x)))
10781   // None of the supported targets knows how to perform load and any_ext
10782   // on vectors in one instruction, so attempt to fold to zext instead.
10783   if (VT.isVector()) {
10784     // Try to simplify (zext (load x)).
10785     if (SDValue foldedExt =
10786             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10787                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
10788       return foldedExt;
10789   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
10790              ISD::isUNINDEXEDLoad(N0.getNode()) &&
10791              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10792     bool DoXform = true;
10793     SmallVector<SDNode *, 4> SetCCs;
10794     if (!N0.hasOneUse())
10795       DoXform =
10796           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
10797     if (DoXform) {
10798       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10799       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10800                                        LN0->getChain(), LN0->getBasePtr(),
10801                                        N0.getValueType(), LN0->getMemOperand());
10802       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10803       // If the load value is used only by N, replace it via CombineTo N.
10804       bool NoReplaceTrunc = N0.hasOneUse();
10805       CombineTo(N, ExtLoad);
10806       if (NoReplaceTrunc) {
10807         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10808         recursivelyDeleteUnusedNodes(LN0);
10809       } else {
10810         SDValue Trunc =
10811             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10812         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10813       }
10814       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10815     }
10816   }
10817 
10818   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10819   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10820   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
10821   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10822       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10823     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10824     ISD::LoadExtType ExtType = LN0->getExtensionType();
10825     EVT MemVT = LN0->getMemoryVT();
10826     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10827       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10828                                        VT, LN0->getChain(), LN0->getBasePtr(),
10829                                        MemVT, LN0->getMemOperand());
10830       CombineTo(N, ExtLoad);
10831       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10832       recursivelyDeleteUnusedNodes(LN0);
10833       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10834     }
10835   }
10836 
10837   if (N0.getOpcode() == ISD::SETCC) {
10838     // For vectors:
10839     // aext(setcc) -> vsetcc
10840     // aext(setcc) -> truncate(vsetcc)
10841     // aext(setcc) -> aext(vsetcc)
10842     // Only do this before legalize for now.
10843     if (VT.isVector() && !LegalOperations) {
10844       EVT N00VT = N0.getOperand(0).getValueType();
10845       if (getSetCCResultType(N00VT) == N0.getValueType())
10846         return SDValue();
10847 
10848       // We know that the # elements of the results is the same as the
10849       // # elements of the compare (and the # elements of the compare result
10850       // for that matter).  Check to see that they are the same size.  If so,
10851       // we know that the element size of the sext'd result matches the
10852       // element size of the compare operands.
10853       if (VT.getSizeInBits() == N00VT.getSizeInBits())
10854         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10855                              N0.getOperand(1),
10856                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
10857 
10858       // If the desired elements are smaller or larger than the source
10859       // elements we can use a matching integer vector type and then
10860       // truncate/any extend
10861       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10862       SDValue VsetCC =
10863         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10864                       N0.getOperand(1),
10865                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
10866       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10867     }
10868 
10869     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10870     SDLoc DL(N);
10871     if (SDValue SCC = SimplifySelectCC(
10872             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10873             DAG.getConstant(0, DL, VT),
10874             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10875       return SCC;
10876   }
10877 
10878   if (SDValue NewCtPop = widenCtPop(N, DAG))
10879     return NewCtPop;
10880 
10881   return SDValue();
10882 }
10883 
10884 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10885   unsigned Opcode = N->getOpcode();
10886   SDValue N0 = N->getOperand(0);
10887   SDValue N1 = N->getOperand(1);
10888   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10889 
10890   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10891   if (N0.getOpcode() == Opcode &&
10892       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10893     return N0;
10894 
10895   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10896       N0.getOperand(0).getOpcode() == Opcode) {
10897     // We have an assert, truncate, assert sandwich. Make one stronger assert
10898     // by asserting on the smallest asserted type to the larger source type.
10899     // This eliminates the later assert:
10900     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10901     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10902     SDValue BigA = N0.getOperand(0);
10903     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10904     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10905            "Asserting zero/sign-extended bits to a type larger than the "
10906            "truncated destination does not provide information");
10907 
10908     SDLoc DL(N);
10909     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10910     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10911     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10912                                     BigA.getOperand(0), MinAssertVTVal);
10913     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10914   }
10915 
10916   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10917   // than X. Just move the AssertZext in front of the truncate and drop the
10918   // AssertSExt.
10919   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10920       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10921       Opcode == ISD::AssertZext) {
10922     SDValue BigA = N0.getOperand(0);
10923     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10924     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10925            "Asserting zero/sign-extended bits to a type larger than the "
10926            "truncated destination does not provide information");
10927 
10928     if (AssertVT.bitsLT(BigA_AssertVT)) {
10929       SDLoc DL(N);
10930       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10931                                       BigA.getOperand(0), N1);
10932       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10933     }
10934   }
10935 
10936   return SDValue();
10937 }
10938 
10939 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
10940   SDLoc DL(N);
10941 
10942   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
10943   SDValue N0 = N->getOperand(0);
10944 
10945   // Fold (assertalign (assertalign x, AL0), AL1) ->
10946   // (assertalign x, max(AL0, AL1))
10947   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
10948     return DAG.getAssertAlign(DL, N0.getOperand(0),
10949                               std::max(AL, AAN->getAlign()));
10950 
10951   // In rare cases, there are trivial arithmetic ops in source operands. Sink
10952   // this assert down to source operands so that those arithmetic ops could be
10953   // exposed to the DAG combining.
10954   switch (N0.getOpcode()) {
10955   default:
10956     break;
10957   case ISD::ADD:
10958   case ISD::SUB: {
10959     unsigned AlignShift = Log2(AL);
10960     SDValue LHS = N0.getOperand(0);
10961     SDValue RHS = N0.getOperand(1);
10962     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
10963     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10964     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
10965       if (LHSAlignShift < AlignShift)
10966         LHS = DAG.getAssertAlign(DL, LHS, AL);
10967       if (RHSAlignShift < AlignShift)
10968         RHS = DAG.getAssertAlign(DL, RHS, AL);
10969       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
10970     }
10971     break;
10972   }
10973   }
10974 
10975   return SDValue();
10976 }
10977 
10978 /// If the result of a wider load is shifted to right of N  bits and then
10979 /// truncated to a narrower type and where N is a multiple of number of bits of
10980 /// the narrower type, transform it to a narrower load from address + N / num of
10981 /// bits of new type. Also narrow the load if the result is masked with an AND
10982 /// to effectively produce a smaller type. If the result is to be extended, also
10983 /// fold the extension to form a extending load.
10984 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10985   unsigned Opc = N->getOpcode();
10986 
10987   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10988   SDValue N0 = N->getOperand(0);
10989   EVT VT = N->getValueType(0);
10990   EVT ExtVT = VT;
10991 
10992   // This transformation isn't valid for vector loads.
10993   if (VT.isVector())
10994     return SDValue();
10995 
10996   unsigned ShAmt = 0;
10997   bool HasShiftedOffset = false;
10998   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10999   // extended to VT.
11000   if (Opc == ISD::SIGN_EXTEND_INREG) {
11001     ExtType = ISD::SEXTLOAD;
11002     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11003   } else if (Opc == ISD::SRL) {
11004     // Another special-case: SRL is basically zero-extending a narrower value,
11005     // or it maybe shifting a higher subword, half or byte into the lowest
11006     // bits.
11007     ExtType = ISD::ZEXTLOAD;
11008     N0 = SDValue(N, 0);
11009 
11010     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11011     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11012     if (!N01 || !LN0)
11013       return SDValue();
11014 
11015     uint64_t ShiftAmt = N01->getZExtValue();
11016     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
11017     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11018       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11019     else
11020       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11021                                 VT.getSizeInBits() - ShiftAmt);
11022   } else if (Opc == ISD::AND) {
11023     // An AND with a constant mask is the same as a truncate + zero-extend.
11024     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11025     if (!AndC)
11026       return SDValue();
11027 
11028     const APInt &Mask = AndC->getAPIntValue();
11029     unsigned ActiveBits = 0;
11030     if (Mask.isMask()) {
11031       ActiveBits = Mask.countTrailingOnes();
11032     } else if (Mask.isShiftedMask()) {
11033       ShAmt = Mask.countTrailingZeros();
11034       APInt ShiftedMask = Mask.lshr(ShAmt);
11035       ActiveBits = ShiftedMask.countTrailingOnes();
11036       HasShiftedOffset = true;
11037     } else
11038       return SDValue();
11039 
11040     ExtType = ISD::ZEXTLOAD;
11041     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11042   }
11043 
11044   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11045     SDValue SRL = N0;
11046     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11047       ShAmt = ConstShift->getZExtValue();
11048       unsigned EVTBits = ExtVT.getSizeInBits();
11049       // Is the shift amount a multiple of size of VT?
11050       if ((ShAmt & (EVTBits-1)) == 0) {
11051         N0 = N0.getOperand(0);
11052         // Is the load width a multiple of size of VT?
11053         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
11054           return SDValue();
11055       }
11056 
11057       // At this point, we must have a load or else we can't do the transform.
11058       auto *LN0 = dyn_cast<LoadSDNode>(N0);
11059       if (!LN0) return SDValue();
11060 
11061       // Because a SRL must be assumed to *need* to zero-extend the high bits
11062       // (as opposed to anyext the high bits), we can't combine the zextload
11063       // lowering of SRL and an sextload.
11064       if (LN0->getExtensionType() == ISD::SEXTLOAD)
11065         return SDValue();
11066 
11067       // If the shift amount is larger than the input type then we're not
11068       // accessing any of the loaded bytes.  If the load was a zextload/extload
11069       // then the result of the shift+trunc is zero/undef (handled elsewhere).
11070       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11071         return SDValue();
11072 
11073       // If the SRL is only used by a masking AND, we may be able to adjust
11074       // the ExtVT to make the AND redundant.
11075       SDNode *Mask = *(SRL->use_begin());
11076       if (Mask->getOpcode() == ISD::AND &&
11077           isa<ConstantSDNode>(Mask->getOperand(1))) {
11078         const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11079         if (ShiftMask.isMask()) {
11080           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11081                                            ShiftMask.countTrailingOnes());
11082           // If the mask is smaller, recompute the type.
11083           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
11084               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11085             ExtVT = MaskedVT;
11086         }
11087       }
11088     }
11089   }
11090 
11091   // If the load is shifted left (and the result isn't shifted back right),
11092   // we can fold the truncate through the shift.
11093   unsigned ShLeftAmt = 0;
11094   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11095       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11096     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11097       ShLeftAmt = N01->getZExtValue();
11098       N0 = N0.getOperand(0);
11099     }
11100   }
11101 
11102   // If we haven't found a load, we can't narrow it.
11103   if (!isa<LoadSDNode>(N0))
11104     return SDValue();
11105 
11106   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11107   // Reducing the width of a volatile load is illegal.  For atomics, we may be
11108   // able to reduce the width provided we never widen again. (see D66309)
11109   if (!LN0->isSimple() ||
11110       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11111     return SDValue();
11112 
11113   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11114     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
11115     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
11116     return LVTStoreBits - EVTStoreBits - ShAmt;
11117   };
11118 
11119   // For big endian targets, we need to adjust the offset to the pointer to
11120   // load the correct bytes.
11121   if (DAG.getDataLayout().isBigEndian())
11122     ShAmt = AdjustBigEndianShift(ShAmt);
11123 
11124   uint64_t PtrOff = ShAmt / 8;
11125   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11126   SDLoc DL(LN0);
11127   // The original load itself didn't wrap, so an offset within it doesn't.
11128   SDNodeFlags Flags;
11129   Flags.setNoUnsignedWrap(true);
11130   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11131                                             TypeSize::Fixed(PtrOff), DL, Flags);
11132   AddToWorklist(NewPtr.getNode());
11133 
11134   SDValue Load;
11135   if (ExtType == ISD::NON_EXTLOAD)
11136     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11137                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11138                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11139   else
11140     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11141                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11142                           NewAlign, LN0->getMemOperand()->getFlags(),
11143                           LN0->getAAInfo());
11144 
11145   // Replace the old load's chain with the new load's chain.
11146   WorklistRemover DeadNodes(*this);
11147   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11148 
11149   // Shift the result left, if we've swallowed a left shift.
11150   SDValue Result = Load;
11151   if (ShLeftAmt != 0) {
11152     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11153     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
11154       ShImmTy = VT;
11155     // If the shift amount is as large as the result size (but, presumably,
11156     // no larger than the source) then the useful bits of the result are
11157     // zero; we can't simply return the shortened shift, because the result
11158     // of that operation is undefined.
11159     if (ShLeftAmt >= VT.getSizeInBits())
11160       Result = DAG.getConstant(0, DL, VT);
11161     else
11162       Result = DAG.getNode(ISD::SHL, DL, VT,
11163                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11164   }
11165 
11166   if (HasShiftedOffset) {
11167     // Recalculate the shift amount after it has been altered to calculate
11168     // the offset.
11169     if (DAG.getDataLayout().isBigEndian())
11170       ShAmt = AdjustBigEndianShift(ShAmt);
11171 
11172     // We're using a shifted mask, so the load now has an offset. This means
11173     // that data has been loaded into the lower bytes than it would have been
11174     // before, so we need to shl the loaded data into the correct position in the
11175     // register.
11176     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
11177     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
11178     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
11179   }
11180 
11181   // Return the new loaded value.
11182   return Result;
11183 }
11184 
11185 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
11186   SDValue N0 = N->getOperand(0);
11187   SDValue N1 = N->getOperand(1);
11188   EVT VT = N->getValueType(0);
11189   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
11190   unsigned VTBits = VT.getScalarSizeInBits();
11191   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
11192 
11193   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11194   if (N0.isUndef())
11195     return DAG.getConstant(0, SDLoc(N), VT);
11196 
11197   // fold (sext_in_reg c1) -> c1
11198   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
11199     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
11200 
11201   // If the input is already sign extended, just drop the extension.
11202   if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
11203     return N0;
11204 
11205   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
11206   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
11207       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
11208     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
11209                        N1);
11210 
11211   // fold (sext_in_reg (sext x)) -> (sext x)
11212   // fold (sext_in_reg (aext x)) -> (sext x)
11213   // if x is small enough or if we know that x has more than 1 sign bit and the
11214   // sign_extend_inreg is extending from one of them.
11215   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
11216     SDValue N00 = N0.getOperand(0);
11217     unsigned N00Bits = N00.getScalarValueSizeInBits();
11218     if ((N00Bits <= ExtVTBits ||
11219          (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
11220         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11221       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
11222   }
11223 
11224   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
11225   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
11226        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
11227        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
11228       N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
11229     if (!LegalOperations ||
11230         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
11231       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
11232                          N0.getOperand(0));
11233   }
11234 
11235   // fold (sext_in_reg (zext x)) -> (sext x)
11236   // iff we are extending the source sign bit.
11237   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
11238     SDValue N00 = N0.getOperand(0);
11239     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
11240         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11241       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
11242   }
11243 
11244   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
11245   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
11246     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
11247 
11248   // fold operands of sext_in_reg based on knowledge that the top bits are not
11249   // demanded.
11250   if (SimplifyDemandedBits(SDValue(N, 0)))
11251     return SDValue(N, 0);
11252 
11253   // fold (sext_in_reg (load x)) -> (smaller sextload x)
11254   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
11255   if (SDValue NarrowLoad = ReduceLoadWidth(N))
11256     return NarrowLoad;
11257 
11258   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
11259   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
11260   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
11261   if (N0.getOpcode() == ISD::SRL) {
11262     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
11263       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
11264         // We can turn this into an SRA iff the input to the SRL is already sign
11265         // extended enough.
11266         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
11267         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
11268           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
11269                              N0.getOperand(1));
11270       }
11271   }
11272 
11273   // fold (sext_inreg (extload x)) -> (sextload x)
11274   // If sextload is not supported by target, we can only do the combine when
11275   // load has one use. Doing otherwise can block folding the extload with other
11276   // extends that the target does support.
11277   if (ISD::isEXTLoad(N0.getNode()) &&
11278       ISD::isUNINDEXEDLoad(N0.getNode()) &&
11279       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11280       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
11281         N0.hasOneUse()) ||
11282        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11283     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11284     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11285                                      LN0->getChain(),
11286                                      LN0->getBasePtr(), ExtVT,
11287                                      LN0->getMemOperand());
11288     CombineTo(N, ExtLoad);
11289     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11290     AddToWorklist(ExtLoad.getNode());
11291     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11292   }
11293   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
11294   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
11295       N0.hasOneUse() &&
11296       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11297       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
11298        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11299     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11300     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11301                                      LN0->getChain(),
11302                                      LN0->getBasePtr(), ExtVT,
11303                                      LN0->getMemOperand());
11304     CombineTo(N, ExtLoad);
11305     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11306     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11307   }
11308 
11309   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
11310   // ignore it if the masked load is already sign extended
11311   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
11312     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
11313         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
11314         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
11315       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
11316           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
11317           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
11318           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
11319       CombineTo(N, ExtMaskedLoad);
11320       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
11321       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11322     }
11323   }
11324 
11325   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
11326   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
11327     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
11328                                            N0.getOperand(1), false))
11329       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
11330   }
11331 
11332   return SDValue();
11333 }
11334 
11335 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
11336   SDValue N0 = N->getOperand(0);
11337   EVT VT = N->getValueType(0);
11338 
11339   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11340   if (N0.isUndef())
11341     return DAG.getConstant(0, SDLoc(N), VT);
11342 
11343   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11344     return Res;
11345 
11346   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11347     return SDValue(N, 0);
11348 
11349   return SDValue();
11350 }
11351 
11352 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
11353   SDValue N0 = N->getOperand(0);
11354   EVT VT = N->getValueType(0);
11355 
11356   // zext_vector_inreg(undef) = 0 because the top bits will be zero.
11357   if (N0.isUndef())
11358     return DAG.getConstant(0, SDLoc(N), VT);
11359 
11360   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11361     return Res;
11362 
11363   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11364     return SDValue(N, 0);
11365 
11366   return SDValue();
11367 }
11368 
11369 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
11370   SDValue N0 = N->getOperand(0);
11371   EVT VT = N->getValueType(0);
11372   EVT SrcVT = N0.getValueType();
11373   bool isLE = DAG.getDataLayout().isLittleEndian();
11374 
11375   // noop truncate
11376   if (SrcVT == VT)
11377     return N0;
11378 
11379   // fold (truncate (truncate x)) -> (truncate x)
11380   if (N0.getOpcode() == ISD::TRUNCATE)
11381     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11382 
11383   // fold (truncate c1) -> c1
11384   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
11385     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
11386     if (C.getNode() != N)
11387       return C;
11388   }
11389 
11390   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
11391   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
11392       N0.getOpcode() == ISD::SIGN_EXTEND ||
11393       N0.getOpcode() == ISD::ANY_EXTEND) {
11394     // if the source is smaller than the dest, we still need an extend.
11395     if (N0.getOperand(0).getValueType().bitsLT(VT))
11396       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11397     // if the source is larger than the dest, than we just need the truncate.
11398     if (N0.getOperand(0).getValueType().bitsGT(VT))
11399       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11400     // if the source and dest are the same type, we can drop both the extend
11401     // and the truncate.
11402     return N0.getOperand(0);
11403   }
11404 
11405   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
11406   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
11407     return SDValue();
11408 
11409   // Fold extract-and-trunc into a narrow extract. For example:
11410   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
11411   //   i32 y = TRUNCATE(i64 x)
11412   //        -- becomes --
11413   //   v16i8 b = BITCAST (v2i64 val)
11414   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
11415   //
11416   // Note: We only run this optimization after type legalization (which often
11417   // creates this pattern) and before operation legalization after which
11418   // we need to be more careful about the vector instructions that we generate.
11419   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11420       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
11421     EVT VecTy = N0.getOperand(0).getValueType();
11422     EVT ExTy = N0.getValueType();
11423     EVT TrTy = N->getValueType(0);
11424 
11425     auto EltCnt = VecTy.getVectorElementCount();
11426     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
11427     auto NewEltCnt = EltCnt * SizeRatio;
11428 
11429     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
11430     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
11431 
11432     SDValue EltNo = N0->getOperand(1);
11433     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
11434       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11435       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
11436 
11437       SDLoc DL(N);
11438       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
11439                          DAG.getBitcast(NVT, N0.getOperand(0)),
11440                          DAG.getVectorIdxConstant(Index, DL));
11441     }
11442   }
11443 
11444   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
11445   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
11446     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
11447         TLI.isTruncateFree(SrcVT, VT)) {
11448       SDLoc SL(N0);
11449       SDValue Cond = N0.getOperand(0);
11450       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11451       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
11452       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
11453     }
11454   }
11455 
11456   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
11457   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11458       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11459       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
11460     SDValue Amt = N0.getOperand(1);
11461     KnownBits Known = DAG.computeKnownBits(Amt);
11462     unsigned Size = VT.getScalarSizeInBits();
11463     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
11464       SDLoc SL(N);
11465       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
11466 
11467       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11468       if (AmtVT != Amt.getValueType()) {
11469         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
11470         AddToWorklist(Amt.getNode());
11471       }
11472       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
11473     }
11474   }
11475 
11476   // Attempt to pre-truncate BUILD_VECTOR sources.
11477   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
11478       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
11479       // Avoid creating illegal types if running after type legalizer.
11480       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
11481     SDLoc DL(N);
11482     EVT SVT = VT.getScalarType();
11483     SmallVector<SDValue, 8> TruncOps;
11484     for (const SDValue &Op : N0->op_values()) {
11485       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
11486       TruncOps.push_back(TruncOp);
11487     }
11488     return DAG.getBuildVector(VT, DL, TruncOps);
11489   }
11490 
11491   // Fold a series of buildvector, bitcast, and truncate if possible.
11492   // For example fold
11493   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
11494   //   (2xi32 (buildvector x, y)).
11495   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
11496       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
11497       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
11498       N0.getOperand(0).hasOneUse()) {
11499     SDValue BuildVect = N0.getOperand(0);
11500     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
11501     EVT TruncVecEltTy = VT.getVectorElementType();
11502 
11503     // Check that the element types match.
11504     if (BuildVectEltTy == TruncVecEltTy) {
11505       // Now we only need to compute the offset of the truncated elements.
11506       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
11507       unsigned TruncVecNumElts = VT.getVectorNumElements();
11508       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
11509 
11510       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
11511              "Invalid number of elements");
11512 
11513       SmallVector<SDValue, 8> Opnds;
11514       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
11515         Opnds.push_back(BuildVect.getOperand(i));
11516 
11517       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
11518     }
11519   }
11520 
11521   // See if we can simplify the input to this truncate through knowledge that
11522   // only the low bits are being used.
11523   // For example "trunc (or (shl x, 8), y)" // -> trunc y
11524   // Currently we only perform this optimization on scalars because vectors
11525   // may have different active low bits.
11526   if (!VT.isVector()) {
11527     APInt Mask =
11528         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
11529     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
11530       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
11531   }
11532 
11533   // fold (truncate (load x)) -> (smaller load x)
11534   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
11535   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
11536     if (SDValue Reduced = ReduceLoadWidth(N))
11537       return Reduced;
11538 
11539     // Handle the case where the load remains an extending load even
11540     // after truncation.
11541     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
11542       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11543       if (LN0->isSimple() &&
11544           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
11545         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
11546                                          VT, LN0->getChain(), LN0->getBasePtr(),
11547                                          LN0->getMemoryVT(),
11548                                          LN0->getMemOperand());
11549         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
11550         return NewLoad;
11551       }
11552     }
11553   }
11554 
11555   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
11556   // where ... are all 'undef'.
11557   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
11558     SmallVector<EVT, 8> VTs;
11559     SDValue V;
11560     unsigned Idx = 0;
11561     unsigned NumDefs = 0;
11562 
11563     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
11564       SDValue X = N0.getOperand(i);
11565       if (!X.isUndef()) {
11566         V = X;
11567         Idx = i;
11568         NumDefs++;
11569       }
11570       // Stop if more than one members are non-undef.
11571       if (NumDefs > 1)
11572         break;
11573 
11574       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
11575                                      VT.getVectorElementType(),
11576                                      X.getValueType().getVectorElementCount()));
11577     }
11578 
11579     if (NumDefs == 0)
11580       return DAG.getUNDEF(VT);
11581 
11582     if (NumDefs == 1) {
11583       assert(V.getNode() && "The single defined operand is empty!");
11584       SmallVector<SDValue, 8> Opnds;
11585       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
11586         if (i != Idx) {
11587           Opnds.push_back(DAG.getUNDEF(VTs[i]));
11588           continue;
11589         }
11590         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
11591         AddToWorklist(NV.getNode());
11592         Opnds.push_back(NV);
11593       }
11594       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
11595     }
11596   }
11597 
11598   // Fold truncate of a bitcast of a vector to an extract of the low vector
11599   // element.
11600   //
11601   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
11602   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
11603     SDValue VecSrc = N0.getOperand(0);
11604     EVT VecSrcVT = VecSrc.getValueType();
11605     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
11606         (!LegalOperations ||
11607          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
11608       SDLoc SL(N);
11609 
11610       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
11611       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
11612                          DAG.getVectorIdxConstant(Idx, SL));
11613     }
11614   }
11615 
11616   // Simplify the operands using demanded-bits information.
11617   if (!VT.isVector() &&
11618       SimplifyDemandedBits(SDValue(N, 0)))
11619     return SDValue(N, 0);
11620 
11621   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
11622   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
11623   // When the adde's carry is not used.
11624   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
11625       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
11626       // We only do for addcarry before legalize operation
11627       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
11628        TLI.isOperationLegal(N0.getOpcode(), VT))) {
11629     SDLoc SL(N);
11630     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11631     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11632     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
11633     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
11634   }
11635 
11636   // fold (truncate (extract_subvector(ext x))) ->
11637   //      (extract_subvector x)
11638   // TODO: This can be generalized to cover cases where the truncate and extract
11639   // do not fully cancel each other out.
11640   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
11641     SDValue N00 = N0.getOperand(0);
11642     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
11643         N00.getOpcode() == ISD::ZERO_EXTEND ||
11644         N00.getOpcode() == ISD::ANY_EXTEND) {
11645       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
11646           VT.getVectorElementType())
11647         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
11648                            N00.getOperand(0), N0.getOperand(1));
11649     }
11650   }
11651 
11652   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11653     return NewVSel;
11654 
11655   // Narrow a suitable binary operation with a non-opaque constant operand by
11656   // moving it ahead of the truncate. This is limited to pre-legalization
11657   // because targets may prefer a wider type during later combines and invert
11658   // this transform.
11659   switch (N0.getOpcode()) {
11660   case ISD::ADD:
11661   case ISD::SUB:
11662   case ISD::MUL:
11663   case ISD::AND:
11664   case ISD::OR:
11665   case ISD::XOR:
11666     if (!LegalOperations && N0.hasOneUse() &&
11667         (isConstantOrConstantVector(N0.getOperand(0), true) ||
11668          isConstantOrConstantVector(N0.getOperand(1), true))) {
11669       // TODO: We already restricted this to pre-legalization, but for vectors
11670       // we are extra cautious to not create an unsupported operation.
11671       // Target-specific changes are likely needed to avoid regressions here.
11672       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
11673         SDLoc DL(N);
11674         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
11675         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
11676         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
11677       }
11678     }
11679   }
11680 
11681   return SDValue();
11682 }
11683 
11684 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
11685   SDValue Elt = N->getOperand(i);
11686   if (Elt.getOpcode() != ISD::MERGE_VALUES)
11687     return Elt.getNode();
11688   return Elt.getOperand(Elt.getResNo()).getNode();
11689 }
11690 
11691 /// build_pair (load, load) -> load
11692 /// if load locations are consecutive.
11693 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
11694   assert(N->getOpcode() == ISD::BUILD_PAIR);
11695 
11696   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
11697   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
11698 
11699   // A BUILD_PAIR is always having the least significant part in elt 0 and the
11700   // most significant part in elt 1. So when combining into one large load, we
11701   // need to consider the endianness.
11702   if (DAG.getDataLayout().isBigEndian())
11703     std::swap(LD1, LD2);
11704 
11705   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
11706       LD1->getAddressSpace() != LD2->getAddressSpace())
11707     return SDValue();
11708   EVT LD1VT = LD1->getValueType(0);
11709   unsigned LD1Bytes = LD1VT.getStoreSize();
11710   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
11711       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
11712     Align Alignment = LD1->getAlign();
11713     Align NewAlign = DAG.getDataLayout().getABITypeAlign(
11714         VT.getTypeForEVT(*DAG.getContext()));
11715 
11716     if (NewAlign <= Alignment &&
11717         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
11718       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
11719                          LD1->getPointerInfo(), Alignment);
11720   }
11721 
11722   return SDValue();
11723 }
11724 
11725 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
11726   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
11727   // and Lo parts; on big-endian machines it doesn't.
11728   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
11729 }
11730 
11731 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
11732                                     const TargetLowering &TLI) {
11733   // If this is not a bitcast to an FP type or if the target doesn't have
11734   // IEEE754-compliant FP logic, we're done.
11735   EVT VT = N->getValueType(0);
11736   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
11737     return SDValue();
11738 
11739   // TODO: Handle cases where the integer constant is a different scalar
11740   // bitwidth to the FP.
11741   SDValue N0 = N->getOperand(0);
11742   EVT SourceVT = N0.getValueType();
11743   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
11744     return SDValue();
11745 
11746   unsigned FPOpcode;
11747   APInt SignMask;
11748   switch (N0.getOpcode()) {
11749   case ISD::AND:
11750     FPOpcode = ISD::FABS;
11751     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
11752     break;
11753   case ISD::XOR:
11754     FPOpcode = ISD::FNEG;
11755     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11756     break;
11757   case ISD::OR:
11758     FPOpcode = ISD::FABS;
11759     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11760     break;
11761   default:
11762     return SDValue();
11763   }
11764 
11765   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
11766   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
11767   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
11768   //   fneg (fabs X)
11769   SDValue LogicOp0 = N0.getOperand(0);
11770   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
11771   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
11772       LogicOp0.getOpcode() == ISD::BITCAST &&
11773       LogicOp0.getOperand(0).getValueType() == VT) {
11774     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
11775     NumFPLogicOpsConv++;
11776     if (N0.getOpcode() == ISD::OR)
11777       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
11778     return FPOp;
11779   }
11780 
11781   return SDValue();
11782 }
11783 
11784 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11785   SDValue N0 = N->getOperand(0);
11786   EVT VT = N->getValueType(0);
11787 
11788   if (N0.isUndef())
11789     return DAG.getUNDEF(VT);
11790 
11791   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11792   // Only do this before legalize types, unless both types are integer and the
11793   // scalar type is legal. Only do this before legalize ops, since the target
11794   // maybe depending on the bitcast.
11795   // First check to see if this is all constant.
11796   // TODO: Support FP bitcasts after legalize types.
11797   if (VT.isVector() &&
11798       (!LegalTypes ||
11799        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
11800         TLI.isTypeLegal(VT.getVectorElementType()))) &&
11801       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
11802       cast<BuildVectorSDNode>(N0)->isConstant())
11803     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11804                                              VT.getVectorElementType());
11805 
11806   // If the input is a constant, let getNode fold it.
11807   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
11808     // If we can't allow illegal operations, we need to check that this is just
11809     // a fp -> int or int -> conversion and that the resulting operation will
11810     // be legal.
11811     if (!LegalOperations ||
11812         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
11813          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
11814         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
11815          TLI.isOperationLegal(ISD::Constant, VT))) {
11816       SDValue C = DAG.getBitcast(VT, N0);
11817       if (C.getNode() != N)
11818         return C;
11819     }
11820   }
11821 
11822   // (conv (conv x, t1), t2) -> (conv x, t2)
11823   if (N0.getOpcode() == ISD::BITCAST)
11824     return DAG.getBitcast(VT, N0.getOperand(0));
11825 
11826   // fold (conv (load x)) -> (load (conv*)x)
11827   // If the resultant load doesn't need a higher alignment than the original!
11828   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11829       // Do not remove the cast if the types differ in endian layout.
11830       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
11831           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11832       // If the load is volatile, we only want to change the load type if the
11833       // resulting load is legal. Otherwise we might increase the number of
11834       // memory accesses. We don't care if the original type was legal or not
11835       // as we assume software couldn't rely on the number of accesses of an
11836       // illegal type.
11837       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
11838        TLI.isOperationLegal(ISD::LOAD, VT))) {
11839     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11840 
11841     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11842                                     *LN0->getMemOperand())) {
11843       SDValue Load =
11844           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11845                       LN0->getPointerInfo(), LN0->getAlign(),
11846                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11847       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11848       return Load;
11849     }
11850   }
11851 
11852   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11853     return V;
11854 
11855   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11856   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11857   //
11858   // For ppc_fp128:
11859   // fold (bitcast (fneg x)) ->
11860   //     flipbit = signbit
11861   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11862   //
11863   // fold (bitcast (fabs x)) ->
11864   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
11865   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11866   // This often reduces constant pool loads.
11867   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11868        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11869       N0.getNode()->hasOneUse() && VT.isInteger() &&
11870       !VT.isVector() && !N0.getValueType().isVector()) {
11871     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11872     AddToWorklist(NewConv.getNode());
11873 
11874     SDLoc DL(N);
11875     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11876       assert(VT.getSizeInBits() == 128);
11877       SDValue SignBit = DAG.getConstant(
11878           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11879       SDValue FlipBit;
11880       if (N0.getOpcode() == ISD::FNEG) {
11881         FlipBit = SignBit;
11882         AddToWorklist(FlipBit.getNode());
11883       } else {
11884         assert(N0.getOpcode() == ISD::FABS);
11885         SDValue Hi =
11886             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11887                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11888                                               SDLoc(NewConv)));
11889         AddToWorklist(Hi.getNode());
11890         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11891         AddToWorklist(FlipBit.getNode());
11892       }
11893       SDValue FlipBits =
11894           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11895       AddToWorklist(FlipBits.getNode());
11896       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11897     }
11898     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11899     if (N0.getOpcode() == ISD::FNEG)
11900       return DAG.getNode(ISD::XOR, DL, VT,
11901                          NewConv, DAG.getConstant(SignBit, DL, VT));
11902     assert(N0.getOpcode() == ISD::FABS);
11903     return DAG.getNode(ISD::AND, DL, VT,
11904                        NewConv, DAG.getConstant(~SignBit, DL, VT));
11905   }
11906 
11907   // fold (bitconvert (fcopysign cst, x)) ->
11908   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
11909   // Note that we don't handle (copysign x, cst) because this can always be
11910   // folded to an fneg or fabs.
11911   //
11912   // For ppc_fp128:
11913   // fold (bitcast (fcopysign cst, x)) ->
11914   //     flipbit = (and (extract_element
11915   //                     (xor (bitcast cst), (bitcast x)), 0),
11916   //                    signbit)
11917   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
11918   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11919       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11920       VT.isInteger() && !VT.isVector()) {
11921     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11922     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11923     if (isTypeLegal(IntXVT)) {
11924       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11925       AddToWorklist(X.getNode());
11926 
11927       // If X has a different width than the result/lhs, sext it or truncate it.
11928       unsigned VTWidth = VT.getSizeInBits();
11929       if (OrigXWidth < VTWidth) {
11930         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11931         AddToWorklist(X.getNode());
11932       } else if (OrigXWidth > VTWidth) {
11933         // To get the sign bit in the right place, we have to shift it right
11934         // before truncating.
11935         SDLoc DL(X);
11936         X = DAG.getNode(ISD::SRL, DL,
11937                         X.getValueType(), X,
11938                         DAG.getConstant(OrigXWidth-VTWidth, DL,
11939                                         X.getValueType()));
11940         AddToWorklist(X.getNode());
11941         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11942         AddToWorklist(X.getNode());
11943       }
11944 
11945       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11946         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11947         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11948         AddToWorklist(Cst.getNode());
11949         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11950         AddToWorklist(X.getNode());
11951         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11952         AddToWorklist(XorResult.getNode());
11953         SDValue XorResult64 = DAG.getNode(
11954             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11955             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11956                                   SDLoc(XorResult)));
11957         AddToWorklist(XorResult64.getNode());
11958         SDValue FlipBit =
11959             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11960                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11961         AddToWorklist(FlipBit.getNode());
11962         SDValue FlipBits =
11963             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11964         AddToWorklist(FlipBits.getNode());
11965         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11966       }
11967       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11968       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11969                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
11970       AddToWorklist(X.getNode());
11971 
11972       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11973       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11974                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11975       AddToWorklist(Cst.getNode());
11976 
11977       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11978     }
11979   }
11980 
11981   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11982   if (N0.getOpcode() == ISD::BUILD_PAIR)
11983     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11984       return CombineLD;
11985 
11986   // Remove double bitcasts from shuffles - this is often a legacy of
11987   // XformToShuffleWithZero being used to combine bitmaskings (of
11988   // float vectors bitcast to integer vectors) into shuffles.
11989   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11990   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11991       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11992       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11993       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11994     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11995 
11996     // If operands are a bitcast, peek through if it casts the original VT.
11997     // If operands are a constant, just bitcast back to original VT.
11998     auto PeekThroughBitcast = [&](SDValue Op) {
11999       if (Op.getOpcode() == ISD::BITCAST &&
12000           Op.getOperand(0).getValueType() == VT)
12001         return SDValue(Op.getOperand(0));
12002       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
12003           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12004         return DAG.getBitcast(VT, Op);
12005       return SDValue();
12006     };
12007 
12008     // FIXME: If either input vector is bitcast, try to convert the shuffle to
12009     // the result type of this bitcast. This would eliminate at least one
12010     // bitcast. See the transform in InstCombine.
12011     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12012     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12013     if (!(SV0 && SV1))
12014       return SDValue();
12015 
12016     int MaskScale =
12017         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12018     SmallVector<int, 8> NewMask;
12019     for (int M : SVN->getMask())
12020       for (int i = 0; i != MaskScale; ++i)
12021         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12022 
12023     SDValue LegalShuffle =
12024         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12025     if (LegalShuffle)
12026       return LegalShuffle;
12027   }
12028 
12029   return SDValue();
12030 }
12031 
12032 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12033   EVT VT = N->getValueType(0);
12034   return CombineConsecutiveLoads(N, VT);
12035 }
12036 
12037 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12038   SDValue N0 = N->getOperand(0);
12039 
12040   // (freeze (freeze x)) -> (freeze x)
12041   if (N0.getOpcode() == ISD::FREEZE)
12042     return N0;
12043 
12044   // If the input is a constant, return it.
12045   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0))
12046     return N0;
12047 
12048   return SDValue();
12049 }
12050 
12051 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12052 /// operands. DstEltVT indicates the destination element value type.
12053 SDValue DAGCombiner::
12054 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12055   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12056 
12057   // If this is already the right type, we're done.
12058   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12059 
12060   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12061   unsigned DstBitSize = DstEltVT.getSizeInBits();
12062 
12063   // If this is a conversion of N elements of one type to N elements of another
12064   // type, convert each element.  This handles FP<->INT cases.
12065   if (SrcBitSize == DstBitSize) {
12066     SmallVector<SDValue, 8> Ops;
12067     for (SDValue Op : BV->op_values()) {
12068       // If the vector element type is not legal, the BUILD_VECTOR operands
12069       // are promoted and implicitly truncated.  Make that explicit here.
12070       if (Op.getValueType() != SrcEltVT)
12071         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12072       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12073       AddToWorklist(Ops.back().getNode());
12074     }
12075     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12076                               BV->getValueType(0).getVectorNumElements());
12077     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12078   }
12079 
12080   // Otherwise, we're growing or shrinking the elements.  To avoid having to
12081   // handle annoying details of growing/shrinking FP values, we convert them to
12082   // int first.
12083   if (SrcEltVT.isFloatingPoint()) {
12084     // Convert the input float vector to a int vector where the elements are the
12085     // same sizes.
12086     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12087     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12088     SrcEltVT = IntVT;
12089   }
12090 
12091   // Now we know the input is an integer vector.  If the output is a FP type,
12092   // convert to integer first, then to FP of the right size.
12093   if (DstEltVT.isFloatingPoint()) {
12094     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12095     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12096 
12097     // Next, convert to FP elements of the same size.
12098     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12099   }
12100 
12101   SDLoc DL(BV);
12102 
12103   // Okay, we know the src/dst types are both integers of differing types.
12104   // Handling growing first.
12105   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
12106   if (SrcBitSize < DstBitSize) {
12107     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12108 
12109     SmallVector<SDValue, 8> Ops;
12110     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12111          i += NumInputsPerOutput) {
12112       bool isLE = DAG.getDataLayout().isLittleEndian();
12113       APInt NewBits = APInt(DstBitSize, 0);
12114       bool EltIsUndef = true;
12115       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12116         // Shift the previously computed bits over.
12117         NewBits <<= SrcBitSize;
12118         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12119         if (Op.isUndef()) continue;
12120         EltIsUndef = false;
12121 
12122         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
12123                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
12124       }
12125 
12126       if (EltIsUndef)
12127         Ops.push_back(DAG.getUNDEF(DstEltVT));
12128       else
12129         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12130     }
12131 
12132     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12133     return DAG.getBuildVector(VT, DL, Ops);
12134   }
12135 
12136   // Finally, this must be the case where we are shrinking elements: each input
12137   // turns into multiple outputs.
12138   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12139   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12140                             NumOutputsPerInput*BV->getNumOperands());
12141   SmallVector<SDValue, 8> Ops;
12142 
12143   for (const SDValue &Op : BV->op_values()) {
12144     if (Op.isUndef()) {
12145       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
12146       continue;
12147     }
12148 
12149     APInt OpVal = cast<ConstantSDNode>(Op)->
12150                   getAPIntValue().zextOrTrunc(SrcBitSize);
12151 
12152     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
12153       APInt ThisVal = OpVal.trunc(DstBitSize);
12154       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
12155       OpVal.lshrInPlace(DstBitSize);
12156     }
12157 
12158     // For big endian targets, swap the order of the pieces of each element.
12159     if (DAG.getDataLayout().isBigEndian())
12160       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
12161   }
12162 
12163   return DAG.getBuildVector(VT, DL, Ops);
12164 }
12165 
12166 static bool isContractable(SDNode *N) {
12167   SDNodeFlags F = N->getFlags();
12168   return F.hasAllowContract() || F.hasAllowReassociation();
12169 }
12170 
12171 /// Try to perform FMA combining on a given FADD node.
12172 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
12173   SDValue N0 = N->getOperand(0);
12174   SDValue N1 = N->getOperand(1);
12175   EVT VT = N->getValueType(0);
12176   SDLoc SL(N);
12177 
12178   const TargetOptions &Options = DAG.getTarget().Options;
12179 
12180   // Floating-point multiply-add with intermediate rounding.
12181   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12182 
12183   // Floating-point multiply-add without intermediate rounding.
12184   bool HasFMA =
12185       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12186       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12187 
12188   // No valid opcode, do not combine.
12189   if (!HasFMAD && !HasFMA)
12190     return SDValue();
12191 
12192   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12193   bool CanReassociate =
12194       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
12195   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12196                               CanFuse || HasFMAD);
12197   // If the addition is not contractable, do not combine.
12198   if (!AllowFusionGlobally && !isContractable(N))
12199     return SDValue();
12200 
12201   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12202     return SDValue();
12203 
12204   // Always prefer FMAD to FMA for precision.
12205   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12206   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12207 
12208   // Is the node an FMUL and contractable either due to global flags or
12209   // SDNodeFlags.
12210   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12211     if (N.getOpcode() != ISD::FMUL)
12212       return false;
12213     return AllowFusionGlobally || isContractable(N.getNode());
12214   };
12215   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
12216   // prefer to fold the multiply with fewer uses.
12217   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
12218     if (N0.getNode()->use_size() > N1.getNode()->use_size())
12219       std::swap(N0, N1);
12220   }
12221 
12222   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
12223   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
12224     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
12225                        N0.getOperand(1), N1);
12226   }
12227 
12228   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
12229   // Note: Commutes FADD operands.
12230   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
12231     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
12232                        N1.getOperand(1), N0);
12233   }
12234 
12235   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
12236   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
12237   // This requires reassociation because it changes the order of operations.
12238   SDValue FMA, E;
12239   if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
12240       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
12241       N0.getOperand(2).hasOneUse()) {
12242     FMA = N0;
12243     E = N1;
12244   } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
12245              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
12246              N1.getOperand(2).hasOneUse()) {
12247     FMA = N1;
12248     E = N0;
12249   }
12250   if (FMA && E) {
12251     SDValue A = FMA.getOperand(0);
12252     SDValue B = FMA.getOperand(1);
12253     SDValue C = FMA.getOperand(2).getOperand(0);
12254     SDValue D = FMA.getOperand(2).getOperand(1);
12255     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
12256     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
12257   }
12258 
12259   // Look through FP_EXTEND nodes to do more combining.
12260 
12261   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
12262   if (N0.getOpcode() == ISD::FP_EXTEND) {
12263     SDValue N00 = N0.getOperand(0);
12264     if (isContractableFMUL(N00) &&
12265         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12266                             N00.getValueType())) {
12267       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12268                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
12269                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
12270                          N1);
12271     }
12272   }
12273 
12274   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
12275   // Note: Commutes FADD operands.
12276   if (N1.getOpcode() == ISD::FP_EXTEND) {
12277     SDValue N10 = N1.getOperand(0);
12278     if (isContractableFMUL(N10) &&
12279         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12280                             N10.getValueType())) {
12281       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12282                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
12283                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
12284                          N0);
12285     }
12286   }
12287 
12288   // More folding opportunities when target permits.
12289   if (Aggressive) {
12290     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
12291     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
12292     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
12293                                     SDValue Z) {
12294       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
12295                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12296                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12297                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12298                                      Z));
12299     };
12300     if (N0.getOpcode() == PreferredFusedOpcode) {
12301       SDValue N02 = N0.getOperand(2);
12302       if (N02.getOpcode() == ISD::FP_EXTEND) {
12303         SDValue N020 = N02.getOperand(0);
12304         if (isContractableFMUL(N020) &&
12305             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12306                                 N020.getValueType())) {
12307           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
12308                                       N020.getOperand(0), N020.getOperand(1),
12309                                       N1);
12310         }
12311       }
12312     }
12313 
12314     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
12315     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
12316     // FIXME: This turns two single-precision and one double-precision
12317     // operation into two double-precision operations, which might not be
12318     // interesting for all targets, especially GPUs.
12319     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
12320                                     SDValue Z) {
12321       return DAG.getNode(
12322           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
12323           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
12324           DAG.getNode(PreferredFusedOpcode, SL, VT,
12325                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12326                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
12327     };
12328     if (N0.getOpcode() == ISD::FP_EXTEND) {
12329       SDValue N00 = N0.getOperand(0);
12330       if (N00.getOpcode() == PreferredFusedOpcode) {
12331         SDValue N002 = N00.getOperand(2);
12332         if (isContractableFMUL(N002) &&
12333             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12334                                 N00.getValueType())) {
12335           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
12336                                       N002.getOperand(0), N002.getOperand(1),
12337                                       N1);
12338         }
12339       }
12340     }
12341 
12342     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
12343     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
12344     if (N1.getOpcode() == PreferredFusedOpcode) {
12345       SDValue N12 = N1.getOperand(2);
12346       if (N12.getOpcode() == ISD::FP_EXTEND) {
12347         SDValue N120 = N12.getOperand(0);
12348         if (isContractableFMUL(N120) &&
12349             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12350                                 N120.getValueType())) {
12351           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
12352                                       N120.getOperand(0), N120.getOperand(1),
12353                                       N0);
12354         }
12355       }
12356     }
12357 
12358     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
12359     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
12360     // FIXME: This turns two single-precision and one double-precision
12361     // operation into two double-precision operations, which might not be
12362     // interesting for all targets, especially GPUs.
12363     if (N1.getOpcode() == ISD::FP_EXTEND) {
12364       SDValue N10 = N1.getOperand(0);
12365       if (N10.getOpcode() == PreferredFusedOpcode) {
12366         SDValue N102 = N10.getOperand(2);
12367         if (isContractableFMUL(N102) &&
12368             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12369                                 N10.getValueType())) {
12370           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
12371                                       N102.getOperand(0), N102.getOperand(1),
12372                                       N0);
12373         }
12374       }
12375     }
12376   }
12377 
12378   return SDValue();
12379 }
12380 
12381 /// Try to perform FMA combining on a given FSUB node.
12382 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
12383   SDValue N0 = N->getOperand(0);
12384   SDValue N1 = N->getOperand(1);
12385   EVT VT = N->getValueType(0);
12386   SDLoc SL(N);
12387 
12388   const TargetOptions &Options = DAG.getTarget().Options;
12389   // Floating-point multiply-add with intermediate rounding.
12390   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12391 
12392   // Floating-point multiply-add without intermediate rounding.
12393   bool HasFMA =
12394       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12395       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12396 
12397   // No valid opcode, do not combine.
12398   if (!HasFMAD && !HasFMA)
12399     return SDValue();
12400 
12401   const SDNodeFlags Flags = N->getFlags();
12402   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12403   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12404                               CanFuse || HasFMAD);
12405 
12406   // If the subtraction is not contractable, do not combine.
12407   if (!AllowFusionGlobally && !isContractable(N))
12408     return SDValue();
12409 
12410   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12411     return SDValue();
12412 
12413   // Always prefer FMAD to FMA for precision.
12414   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12415   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12416   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
12417 
12418   // Is the node an FMUL and contractable either due to global flags or
12419   // SDNodeFlags.
12420   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12421     if (N.getOpcode() != ISD::FMUL)
12422       return false;
12423     return AllowFusionGlobally || isContractable(N.getNode());
12424   };
12425 
12426   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12427   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
12428     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
12429       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
12430                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
12431     }
12432     return SDValue();
12433   };
12434 
12435   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12436   // Note: Commutes FSUB operands.
12437   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
12438     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
12439       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12440                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
12441                          YZ.getOperand(1), X);
12442     }
12443     return SDValue();
12444   };
12445 
12446   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
12447   // prefer to fold the multiply with fewer uses.
12448   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
12449       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
12450     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
12451     if (SDValue V = tryToFoldXSubYZ(N0, N1))
12452       return V;
12453     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
12454     if (SDValue V = tryToFoldXYSubZ(N0, N1))
12455       return V;
12456   } else {
12457     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12458     if (SDValue V = tryToFoldXYSubZ(N0, N1))
12459       return V;
12460     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12461     if (SDValue V = tryToFoldXSubYZ(N0, N1))
12462       return V;
12463   }
12464 
12465   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
12466   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
12467       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
12468     SDValue N00 = N0.getOperand(0).getOperand(0);
12469     SDValue N01 = N0.getOperand(0).getOperand(1);
12470     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12471                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
12472                        DAG.getNode(ISD::FNEG, SL, VT, N1));
12473   }
12474 
12475   // Look through FP_EXTEND nodes to do more combining.
12476 
12477   // fold (fsub (fpext (fmul x, y)), z)
12478   //   -> (fma (fpext x), (fpext y), (fneg z))
12479   if (N0.getOpcode() == ISD::FP_EXTEND) {
12480     SDValue N00 = N0.getOperand(0);
12481     if (isContractableFMUL(N00) &&
12482         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12483                             N00.getValueType())) {
12484       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12485                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
12486                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
12487                          DAG.getNode(ISD::FNEG, SL, VT, N1));
12488     }
12489   }
12490 
12491   // fold (fsub x, (fpext (fmul y, z)))
12492   //   -> (fma (fneg (fpext y)), (fpext z), x)
12493   // Note: Commutes FSUB operands.
12494   if (N1.getOpcode() == ISD::FP_EXTEND) {
12495     SDValue N10 = N1.getOperand(0);
12496     if (isContractableFMUL(N10) &&
12497         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12498                             N10.getValueType())) {
12499       return DAG.getNode(
12500           PreferredFusedOpcode, SL, VT,
12501           DAG.getNode(ISD::FNEG, SL, VT,
12502                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
12503           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
12504     }
12505   }
12506 
12507   // fold (fsub (fpext (fneg (fmul, x, y))), z)
12508   //   -> (fneg (fma (fpext x), (fpext y), z))
12509   // Note: This could be removed with appropriate canonicalization of the
12510   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12511   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12512   // from implementing the canonicalization in visitFSUB.
12513   if (N0.getOpcode() == ISD::FP_EXTEND) {
12514     SDValue N00 = N0.getOperand(0);
12515     if (N00.getOpcode() == ISD::FNEG) {
12516       SDValue N000 = N00.getOperand(0);
12517       if (isContractableFMUL(N000) &&
12518           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12519                               N00.getValueType())) {
12520         return DAG.getNode(
12521             ISD::FNEG, SL, VT,
12522             DAG.getNode(PreferredFusedOpcode, SL, VT,
12523                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
12524                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
12525                         N1));
12526       }
12527     }
12528   }
12529 
12530   // fold (fsub (fneg (fpext (fmul, x, y))), z)
12531   //   -> (fneg (fma (fpext x)), (fpext y), z)
12532   // Note: This could be removed with appropriate canonicalization of the
12533   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12534   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12535   // from implementing the canonicalization in visitFSUB.
12536   if (N0.getOpcode() == ISD::FNEG) {
12537     SDValue N00 = N0.getOperand(0);
12538     if (N00.getOpcode() == ISD::FP_EXTEND) {
12539       SDValue N000 = N00.getOperand(0);
12540       if (isContractableFMUL(N000) &&
12541           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12542                               N000.getValueType())) {
12543         return DAG.getNode(
12544             ISD::FNEG, SL, VT,
12545             DAG.getNode(PreferredFusedOpcode, SL, VT,
12546                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
12547                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
12548                         N1));
12549       }
12550     }
12551   }
12552 
12553   // More folding opportunities when target permits.
12554   if (Aggressive) {
12555     // fold (fsub (fma x, y, (fmul u, v)), z)
12556     //   -> (fma x, y (fma u, v, (fneg z)))
12557     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
12558         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
12559         N0.getOperand(2)->hasOneUse()) {
12560       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
12561                          N0.getOperand(1),
12562                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12563                                      N0.getOperand(2).getOperand(0),
12564                                      N0.getOperand(2).getOperand(1),
12565                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
12566     }
12567 
12568     // fold (fsub x, (fma y, z, (fmul u, v)))
12569     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
12570     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
12571         isContractableFMUL(N1.getOperand(2)) &&
12572         N1->hasOneUse() && NoSignedZero) {
12573       SDValue N20 = N1.getOperand(2).getOperand(0);
12574       SDValue N21 = N1.getOperand(2).getOperand(1);
12575       return DAG.getNode(
12576           PreferredFusedOpcode, SL, VT,
12577           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
12578           DAG.getNode(PreferredFusedOpcode, SL, VT,
12579                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
12580     }
12581 
12582 
12583     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
12584     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
12585     if (N0.getOpcode() == PreferredFusedOpcode &&
12586         N0->hasOneUse()) {
12587       SDValue N02 = N0.getOperand(2);
12588       if (N02.getOpcode() == ISD::FP_EXTEND) {
12589         SDValue N020 = N02.getOperand(0);
12590         if (isContractableFMUL(N020) &&
12591             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12592                                 N020.getValueType())) {
12593           return DAG.getNode(
12594               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
12595               DAG.getNode(
12596                   PreferredFusedOpcode, SL, VT,
12597                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
12598                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
12599                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
12600         }
12601       }
12602     }
12603 
12604     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
12605     //   -> (fma (fpext x), (fpext y),
12606     //           (fma (fpext u), (fpext v), (fneg z)))
12607     // FIXME: This turns two single-precision and one double-precision
12608     // operation into two double-precision operations, which might not be
12609     // interesting for all targets, especially GPUs.
12610     if (N0.getOpcode() == ISD::FP_EXTEND) {
12611       SDValue N00 = N0.getOperand(0);
12612       if (N00.getOpcode() == PreferredFusedOpcode) {
12613         SDValue N002 = N00.getOperand(2);
12614         if (isContractableFMUL(N002) &&
12615             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12616                                 N00.getValueType())) {
12617           return DAG.getNode(
12618               PreferredFusedOpcode, SL, VT,
12619               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
12620               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
12621               DAG.getNode(
12622                   PreferredFusedOpcode, SL, VT,
12623                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
12624                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
12625                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
12626         }
12627       }
12628     }
12629 
12630     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
12631     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
12632     if (N1.getOpcode() == PreferredFusedOpcode &&
12633         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
12634         N1->hasOneUse()) {
12635       SDValue N120 = N1.getOperand(2).getOperand(0);
12636       if (isContractableFMUL(N120) &&
12637           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12638                               N120.getValueType())) {
12639         SDValue N1200 = N120.getOperand(0);
12640         SDValue N1201 = N120.getOperand(1);
12641         return DAG.getNode(
12642             PreferredFusedOpcode, SL, VT,
12643             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
12644             DAG.getNode(PreferredFusedOpcode, SL, VT,
12645                         DAG.getNode(ISD::FNEG, SL, VT,
12646                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
12647                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
12648       }
12649     }
12650 
12651     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
12652     //   -> (fma (fneg (fpext y)), (fpext z),
12653     //           (fma (fneg (fpext u)), (fpext v), x))
12654     // FIXME: This turns two single-precision and one double-precision
12655     // operation into two double-precision operations, which might not be
12656     // interesting for all targets, especially GPUs.
12657     if (N1.getOpcode() == ISD::FP_EXTEND &&
12658         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
12659       SDValue CvtSrc = N1.getOperand(0);
12660       SDValue N100 = CvtSrc.getOperand(0);
12661       SDValue N101 = CvtSrc.getOperand(1);
12662       SDValue N102 = CvtSrc.getOperand(2);
12663       if (isContractableFMUL(N102) &&
12664           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12665                               CvtSrc.getValueType())) {
12666         SDValue N1020 = N102.getOperand(0);
12667         SDValue N1021 = N102.getOperand(1);
12668         return DAG.getNode(
12669             PreferredFusedOpcode, SL, VT,
12670             DAG.getNode(ISD::FNEG, SL, VT,
12671                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
12672             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
12673             DAG.getNode(PreferredFusedOpcode, SL, VT,
12674                         DAG.getNode(ISD::FNEG, SL, VT,
12675                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
12676                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
12677       }
12678     }
12679   }
12680 
12681   return SDValue();
12682 }
12683 
12684 /// Try to perform FMA combining on a given FMUL node based on the distributive
12685 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
12686 /// subtraction instead of addition).
12687 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
12688   SDValue N0 = N->getOperand(0);
12689   SDValue N1 = N->getOperand(1);
12690   EVT VT = N->getValueType(0);
12691   SDLoc SL(N);
12692 
12693   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
12694 
12695   const TargetOptions &Options = DAG.getTarget().Options;
12696 
12697   // The transforms below are incorrect when x == 0 and y == inf, because the
12698   // intermediate multiplication produces a nan.
12699   if (!Options.NoInfsFPMath)
12700     return SDValue();
12701 
12702   // Floating-point multiply-add without intermediate rounding.
12703   bool HasFMA =
12704       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
12705       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12706       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12707 
12708   // Floating-point multiply-add with intermediate rounding. This can result
12709   // in a less precise result due to the changed rounding order.
12710   bool HasFMAD = Options.UnsafeFPMath &&
12711                  (LegalOperations && TLI.isFMADLegal(DAG, N));
12712 
12713   // No valid opcode, do not combine.
12714   if (!HasFMAD && !HasFMA)
12715     return SDValue();
12716 
12717   // Always prefer FMAD to FMA for precision.
12718   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12719   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12720 
12721   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
12722   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
12723   auto FuseFADD = [&](SDValue X, SDValue Y) {
12724     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
12725       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
12726         if (C->isExactlyValue(+1.0))
12727           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12728                              Y);
12729         if (C->isExactlyValue(-1.0))
12730           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12731                              DAG.getNode(ISD::FNEG, SL, VT, Y));
12732       }
12733     }
12734     return SDValue();
12735   };
12736 
12737   if (SDValue FMA = FuseFADD(N0, N1))
12738     return FMA;
12739   if (SDValue FMA = FuseFADD(N1, N0))
12740     return FMA;
12741 
12742   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
12743   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
12744   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
12745   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
12746   auto FuseFSUB = [&](SDValue X, SDValue Y) {
12747     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
12748       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
12749         if (C0->isExactlyValue(+1.0))
12750           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12751                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12752                              Y);
12753         if (C0->isExactlyValue(-1.0))
12754           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12755                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12756                              DAG.getNode(ISD::FNEG, SL, VT, Y));
12757       }
12758       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
12759         if (C1->isExactlyValue(+1.0))
12760           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12761                              DAG.getNode(ISD::FNEG, SL, VT, Y));
12762         if (C1->isExactlyValue(-1.0))
12763           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12764                              Y);
12765       }
12766     }
12767     return SDValue();
12768   };
12769 
12770   if (SDValue FMA = FuseFSUB(N0, N1))
12771     return FMA;
12772   if (SDValue FMA = FuseFSUB(N1, N0))
12773     return FMA;
12774 
12775   return SDValue();
12776 }
12777 
12778 SDValue DAGCombiner::visitFADD(SDNode *N) {
12779   SDValue N0 = N->getOperand(0);
12780   SDValue N1 = N->getOperand(1);
12781   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12782   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12783   EVT VT = N->getValueType(0);
12784   SDLoc DL(N);
12785   const TargetOptions &Options = DAG.getTarget().Options;
12786   SDNodeFlags Flags = N->getFlags();
12787   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
12788 
12789   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12790     return R;
12791 
12792   // fold vector ops
12793   if (VT.isVector())
12794     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12795       return FoldedVOp;
12796 
12797   // fold (fadd c1, c2) -> c1 + c2
12798   if (N0CFP && N1CFP)
12799     return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
12800 
12801   // canonicalize constant to RHS
12802   if (N0CFP && !N1CFP)
12803     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
12804 
12805   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
12806   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12807   if (N1C && N1C->isZero())
12808     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
12809       return N0;
12810 
12811   if (SDValue NewSel = foldBinOpIntoSelect(N))
12812     return NewSel;
12813 
12814   // fold (fadd A, (fneg B)) -> (fsub A, B)
12815   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
12816     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
12817             N1, DAG, LegalOperations, ForCodeSize))
12818       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
12819 
12820   // fold (fadd (fneg A), B) -> (fsub B, A)
12821   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
12822     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
12823             N0, DAG, LegalOperations, ForCodeSize))
12824       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
12825 
12826   auto isFMulNegTwo = [](SDValue FMul) {
12827     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
12828       return false;
12829     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12830     return C && C->isExactlyValue(-2.0);
12831   };
12832 
12833   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12834   if (isFMulNegTwo(N0)) {
12835     SDValue B = N0.getOperand(0);
12836     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
12837     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
12838   }
12839   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12840   if (isFMulNegTwo(N1)) {
12841     SDValue B = N1.getOperand(0);
12842     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
12843     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
12844   }
12845 
12846   // No FP constant should be created after legalization as Instruction
12847   // Selection pass has a hard time dealing with FP constants.
12848   bool AllowNewConst = (Level < AfterLegalizeDAG);
12849 
12850   // If nnan is enabled, fold lots of things.
12851   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
12852     // If allowed, fold (fadd (fneg x), x) -> 0.0
12853     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
12854       return DAG.getConstantFP(0.0, DL, VT);
12855 
12856     // If allowed, fold (fadd x, (fneg x)) -> 0.0
12857     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
12858       return DAG.getConstantFP(0.0, DL, VT);
12859   }
12860 
12861   // If 'unsafe math' or reassoc and nsz, fold lots of things.
12862   // TODO: break out portions of the transformations below for which Unsafe is
12863   //       considered and which do not require both nsz and reassoc
12864   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12865        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12866       AllowNewConst) {
12867     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12868     if (N1CFP && N0.getOpcode() == ISD::FADD &&
12869         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12870       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
12871       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
12872     }
12873 
12874     // We can fold chains of FADD's of the same value into multiplications.
12875     // This transform is not safe in general because we are reducing the number
12876     // of rounding steps.
12877     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
12878       if (N0.getOpcode() == ISD::FMUL) {
12879         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12880         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
12881 
12882         // (fadd (fmul x, c), x) -> (fmul x, c+1)
12883         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
12884           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12885                                        DAG.getConstantFP(1.0, DL, VT));
12886           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
12887         }
12888 
12889         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12890         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
12891             N1.getOperand(0) == N1.getOperand(1) &&
12892             N0.getOperand(0) == N1.getOperand(0)) {
12893           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12894                                        DAG.getConstantFP(2.0, DL, VT));
12895           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
12896         }
12897       }
12898 
12899       if (N1.getOpcode() == ISD::FMUL) {
12900         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12901         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12902 
12903         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12904         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12905           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12906                                        DAG.getConstantFP(1.0, DL, VT));
12907           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
12908         }
12909 
12910         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12911         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12912             N0.getOperand(0) == N0.getOperand(1) &&
12913             N1.getOperand(0) == N0.getOperand(0)) {
12914           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12915                                        DAG.getConstantFP(2.0, DL, VT));
12916           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
12917         }
12918       }
12919 
12920       if (N0.getOpcode() == ISD::FADD) {
12921         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12922         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12923         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12924             (N0.getOperand(0) == N1)) {
12925           return DAG.getNode(ISD::FMUL, DL, VT, N1,
12926                              DAG.getConstantFP(3.0, DL, VT));
12927         }
12928       }
12929 
12930       if (N1.getOpcode() == ISD::FADD) {
12931         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12932         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12933         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12934             N1.getOperand(0) == N0) {
12935           return DAG.getNode(ISD::FMUL, DL, VT, N0,
12936                              DAG.getConstantFP(3.0, DL, VT));
12937         }
12938       }
12939 
12940       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12941       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12942           N0.getOperand(0) == N0.getOperand(1) &&
12943           N1.getOperand(0) == N1.getOperand(1) &&
12944           N0.getOperand(0) == N1.getOperand(0)) {
12945         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12946                            DAG.getConstantFP(4.0, DL, VT));
12947       }
12948     }
12949   } // enable-unsafe-fp-math
12950 
12951   // FADD -> FMA combines:
12952   if (SDValue Fused = visitFADDForFMACombine(N)) {
12953     AddToWorklist(Fused.getNode());
12954     return Fused;
12955   }
12956   return SDValue();
12957 }
12958 
12959 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
12960   SDValue Chain = N->getOperand(0);
12961   SDValue N0 = N->getOperand(1);
12962   SDValue N1 = N->getOperand(2);
12963   EVT VT = N->getValueType(0);
12964   EVT ChainVT = N->getValueType(1);
12965   SDLoc DL(N);
12966   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
12967 
12968   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
12969   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
12970     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
12971             N1, DAG, LegalOperations, ForCodeSize)) {
12972       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
12973                          {Chain, N0, NegN1});
12974     }
12975 
12976   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
12977   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
12978     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
12979             N0, DAG, LegalOperations, ForCodeSize)) {
12980       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
12981                          {Chain, N1, NegN0});
12982     }
12983   return SDValue();
12984 }
12985 
12986 SDValue DAGCombiner::visitFSUB(SDNode *N) {
12987   SDValue N0 = N->getOperand(0);
12988   SDValue N1 = N->getOperand(1);
12989   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12990   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12991   EVT VT = N->getValueType(0);
12992   SDLoc DL(N);
12993   const TargetOptions &Options = DAG.getTarget().Options;
12994   const SDNodeFlags Flags = N->getFlags();
12995   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
12996 
12997   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12998     return R;
12999 
13000   // fold vector ops
13001   if (VT.isVector())
13002     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13003       return FoldedVOp;
13004 
13005   // fold (fsub c1, c2) -> c1-c2
13006   if (N0CFP && N1CFP)
13007     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13008 
13009   if (SDValue NewSel = foldBinOpIntoSelect(N))
13010     return NewSel;
13011 
13012   // (fsub A, 0) -> A
13013   if (N1CFP && N1CFP->isZero()) {
13014     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
13015         Flags.hasNoSignedZeros()) {
13016       return N0;
13017     }
13018   }
13019 
13020   if (N0 == N1) {
13021     // (fsub x, x) -> 0.0
13022     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
13023       return DAG.getConstantFP(0.0f, DL, VT);
13024   }
13025 
13026   // (fsub -0.0, N1) -> -N1
13027   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
13028   //       FSUB does not specify the sign bit of a NaN. Also note that for
13029   //       the same reason, the inverse transform is not safe, unless fast math
13030   //       flags are in play.
13031   if (N0CFP && N0CFP->isZero()) {
13032     if (N0CFP->isNegative() ||
13033         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13034       if (SDValue NegN1 =
13035               TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13036         return NegN1;
13037       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13038         return DAG.getNode(ISD::FNEG, DL, VT, N1);
13039     }
13040   }
13041 
13042   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13043        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13044       N1.getOpcode() == ISD::FADD) {
13045     // X - (X + Y) -> -Y
13046     if (N0 == N1->getOperand(0))
13047       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13048     // X - (Y + X) -> -Y
13049     if (N0 == N1->getOperand(1))
13050       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13051   }
13052 
13053   // fold (fsub A, (fneg B)) -> (fadd A, B)
13054   if (SDValue NegN1 =
13055           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13056     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13057 
13058   // FSUB -> FMA combines:
13059   if (SDValue Fused = visitFSUBForFMACombine(N)) {
13060     AddToWorklist(Fused.getNode());
13061     return Fused;
13062   }
13063 
13064   return SDValue();
13065 }
13066 
13067 SDValue DAGCombiner::visitFMUL(SDNode *N) {
13068   SDValue N0 = N->getOperand(0);
13069   SDValue N1 = N->getOperand(1);
13070   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13071   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13072   EVT VT = N->getValueType(0);
13073   SDLoc DL(N);
13074   const TargetOptions &Options = DAG.getTarget().Options;
13075   const SDNodeFlags Flags = N->getFlags();
13076   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13077 
13078   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13079     return R;
13080 
13081   // fold vector ops
13082   if (VT.isVector()) {
13083     // This just handles C1 * C2 for vectors. Other vector folds are below.
13084     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13085       return FoldedVOp;
13086   }
13087 
13088   // fold (fmul c1, c2) -> c1*c2
13089   if (N0CFP && N1CFP)
13090     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13091 
13092   // canonicalize constant to RHS
13093   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13094      !isConstantFPBuildVectorOrConstantFP(N1))
13095     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13096 
13097   if (SDValue NewSel = foldBinOpIntoSelect(N))
13098     return NewSel;
13099 
13100   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
13101     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13102     if (isConstantFPBuildVectorOrConstantFP(N1) &&
13103         N0.getOpcode() == ISD::FMUL) {
13104       SDValue N00 = N0.getOperand(0);
13105       SDValue N01 = N0.getOperand(1);
13106       // Avoid an infinite loop by making sure that N00 is not a constant
13107       // (the inner multiply has not been constant folded yet).
13108       if (isConstantFPBuildVectorOrConstantFP(N01) &&
13109           !isConstantFPBuildVectorOrConstantFP(N00)) {
13110         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13111         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13112       }
13113     }
13114 
13115     // Match a special-case: we convert X * 2.0 into fadd.
13116     // fmul (fadd X, X), C -> fmul X, 2.0 * C
13117     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13118         N0.getOperand(0) == N0.getOperand(1)) {
13119       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
13120       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
13121       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
13122     }
13123   }
13124 
13125   // fold (fmul X, 2.0) -> (fadd X, X)
13126   if (N1CFP && N1CFP->isExactlyValue(+2.0))
13127     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
13128 
13129   // fold (fmul X, -1.0) -> (fneg X)
13130   if (N1CFP && N1CFP->isExactlyValue(-1.0))
13131     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13132       return DAG.getNode(ISD::FNEG, DL, VT, N0);
13133 
13134   // -N0 * -N1 --> N0 * N1
13135   TargetLowering::NegatibleCost CostN0 =
13136       TargetLowering::NegatibleCost::Expensive;
13137   TargetLowering::NegatibleCost CostN1 =
13138       TargetLowering::NegatibleCost::Expensive;
13139   SDValue NegN0 =
13140       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13141   SDValue NegN1 =
13142       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13143   if (NegN0 && NegN1 &&
13144       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13145        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13146     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
13147 
13148   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
13149   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
13150   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
13151       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
13152       TLI.isOperationLegal(ISD::FABS, VT)) {
13153     SDValue Select = N0, X = N1;
13154     if (Select.getOpcode() != ISD::SELECT)
13155       std::swap(Select, X);
13156 
13157     SDValue Cond = Select.getOperand(0);
13158     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
13159     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
13160 
13161     if (TrueOpnd && FalseOpnd &&
13162         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
13163         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
13164         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
13165       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13166       switch (CC) {
13167       default: break;
13168       case ISD::SETOLT:
13169       case ISD::SETULT:
13170       case ISD::SETOLE:
13171       case ISD::SETULE:
13172       case ISD::SETLT:
13173       case ISD::SETLE:
13174         std::swap(TrueOpnd, FalseOpnd);
13175         LLVM_FALLTHROUGH;
13176       case ISD::SETOGT:
13177       case ISD::SETUGT:
13178       case ISD::SETOGE:
13179       case ISD::SETUGE:
13180       case ISD::SETGT:
13181       case ISD::SETGE:
13182         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
13183             TLI.isOperationLegal(ISD::FNEG, VT))
13184           return DAG.getNode(ISD::FNEG, DL, VT,
13185                    DAG.getNode(ISD::FABS, DL, VT, X));
13186         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
13187           return DAG.getNode(ISD::FABS, DL, VT, X);
13188 
13189         break;
13190       }
13191     }
13192   }
13193 
13194   // FMUL -> FMA combines:
13195   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
13196     AddToWorklist(Fused.getNode());
13197     return Fused;
13198   }
13199 
13200   return SDValue();
13201 }
13202 
13203 SDValue DAGCombiner::visitFMA(SDNode *N) {
13204   SDValue N0 = N->getOperand(0);
13205   SDValue N1 = N->getOperand(1);
13206   SDValue N2 = N->getOperand(2);
13207   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13208   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13209   EVT VT = N->getValueType(0);
13210   SDLoc DL(N);
13211   const TargetOptions &Options = DAG.getTarget().Options;
13212   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13213 
13214   // FMA nodes have flags that propagate to the created nodes.
13215   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
13216 
13217   // Constant fold FMA.
13218   if (isa<ConstantFPSDNode>(N0) &&
13219       isa<ConstantFPSDNode>(N1) &&
13220       isa<ConstantFPSDNode>(N2)) {
13221     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
13222   }
13223 
13224   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
13225   TargetLowering::NegatibleCost CostN0 =
13226       TargetLowering::NegatibleCost::Expensive;
13227   TargetLowering::NegatibleCost CostN1 =
13228       TargetLowering::NegatibleCost::Expensive;
13229   SDValue NegN0 =
13230       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13231   SDValue NegN1 =
13232       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13233   if (NegN0 && NegN1 &&
13234       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13235        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13236     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
13237 
13238   if (UnsafeFPMath) {
13239     if (N0CFP && N0CFP->isZero())
13240       return N2;
13241     if (N1CFP && N1CFP->isZero())
13242       return N2;
13243   }
13244 
13245   if (N0CFP && N0CFP->isExactlyValue(1.0))
13246     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
13247   if (N1CFP && N1CFP->isExactlyValue(1.0))
13248     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
13249 
13250   // Canonicalize (fma c, x, y) -> (fma x, c, y)
13251   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13252      !isConstantFPBuildVectorOrConstantFP(N1))
13253     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
13254 
13255   if (UnsafeFPMath) {
13256     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
13257     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
13258         isConstantFPBuildVectorOrConstantFP(N1) &&
13259         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
13260       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13261                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
13262     }
13263 
13264     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
13265     if (N0.getOpcode() == ISD::FMUL &&
13266         isConstantFPBuildVectorOrConstantFP(N1) &&
13267         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13268       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13269                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
13270                          N2);
13271     }
13272   }
13273 
13274   // (fma x, -1, y) -> (fadd (fneg x), y)
13275   if (N1CFP) {
13276     if (N1CFP->isExactlyValue(1.0))
13277       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
13278 
13279     if (N1CFP->isExactlyValue(-1.0) &&
13280         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
13281       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
13282       AddToWorklist(RHSNeg.getNode());
13283       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
13284     }
13285 
13286     // fma (fneg x), K, y -> fma x -K, y
13287     if (N0.getOpcode() == ISD::FNEG &&
13288         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13289          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
13290                                               ForCodeSize)))) {
13291       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13292                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
13293     }
13294   }
13295 
13296   if (UnsafeFPMath) {
13297     // (fma x, c, x) -> (fmul x, (c+1))
13298     if (N1CFP && N0 == N2) {
13299       return DAG.getNode(
13300           ISD::FMUL, DL, VT, N0,
13301           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
13302     }
13303 
13304     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
13305     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
13306       return DAG.getNode(
13307           ISD::FMUL, DL, VT, N0,
13308           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
13309     }
13310   }
13311 
13312   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
13313   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
13314   if (!TLI.isFNegFree(VT))
13315     if (SDValue Neg = TLI.getCheaperNegatedExpression(
13316             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
13317       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
13318   return SDValue();
13319 }
13320 
13321 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13322 // reciprocal.
13323 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
13324 // Notice that this is not always beneficial. One reason is different targets
13325 // may have different costs for FDIV and FMUL, so sometimes the cost of two
13326 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
13327 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
13328 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
13329   // TODO: Limit this transform based on optsize/minsize - it always creates at
13330   //       least 1 extra instruction. But the perf win may be substantial enough
13331   //       that only minsize should restrict this.
13332   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
13333   const SDNodeFlags Flags = N->getFlags();
13334   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
13335     return SDValue();
13336 
13337   // Skip if current node is a reciprocal/fneg-reciprocal.
13338   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13339   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
13340   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
13341     return SDValue();
13342 
13343   // Exit early if the target does not want this transform or if there can't
13344   // possibly be enough uses of the divisor to make the transform worthwhile.
13345   unsigned MinUses = TLI.combineRepeatedFPDivisors();
13346 
13347   // For splat vectors, scale the number of uses by the splat factor. If we can
13348   // convert the division into a scalar op, that will likely be much faster.
13349   unsigned NumElts = 1;
13350   EVT VT = N->getValueType(0);
13351   if (VT.isVector() && DAG.isSplatValue(N1))
13352     NumElts = VT.getVectorNumElements();
13353 
13354   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
13355     return SDValue();
13356 
13357   // Find all FDIV users of the same divisor.
13358   // Use a set because duplicates may be present in the user list.
13359   SetVector<SDNode *> Users;
13360   for (auto *U : N1->uses()) {
13361     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
13362       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
13363       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
13364           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
13365           U->getFlags().hasAllowReassociation() &&
13366           U->getFlags().hasNoSignedZeros())
13367         continue;
13368 
13369       // This division is eligible for optimization only if global unsafe math
13370       // is enabled or if this division allows reciprocal formation.
13371       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
13372         Users.insert(U);
13373     }
13374   }
13375 
13376   // Now that we have the actual number of divisor uses, make sure it meets
13377   // the minimum threshold specified by the target.
13378   if ((Users.size() * NumElts) < MinUses)
13379     return SDValue();
13380 
13381   SDLoc DL(N);
13382   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
13383   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
13384 
13385   // Dividend / Divisor -> Dividend * Reciprocal
13386   for (auto *U : Users) {
13387     SDValue Dividend = U->getOperand(0);
13388     if (Dividend != FPOne) {
13389       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
13390                                     Reciprocal, Flags);
13391       CombineTo(U, NewNode);
13392     } else if (U != Reciprocal.getNode()) {
13393       // In the absence of fast-math-flags, this user node is always the
13394       // same node as Reciprocal, but with FMF they may be different nodes.
13395       CombineTo(U, Reciprocal);
13396     }
13397   }
13398   return SDValue(N, 0);  // N was replaced.
13399 }
13400 
13401 SDValue DAGCombiner::visitFDIV(SDNode *N) {
13402   SDValue N0 = N->getOperand(0);
13403   SDValue N1 = N->getOperand(1);
13404   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13405   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13406   EVT VT = N->getValueType(0);
13407   SDLoc DL(N);
13408   const TargetOptions &Options = DAG.getTarget().Options;
13409   SDNodeFlags Flags = N->getFlags();
13410   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13411 
13412   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13413     return R;
13414 
13415   // fold vector ops
13416   if (VT.isVector())
13417     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13418       return FoldedVOp;
13419 
13420   // fold (fdiv c1, c2) -> c1/c2
13421   if (N0CFP && N1CFP)
13422     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
13423 
13424   if (SDValue NewSel = foldBinOpIntoSelect(N))
13425     return NewSel;
13426 
13427   if (SDValue V = combineRepeatedFPDivisors(N))
13428     return V;
13429 
13430   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
13431     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
13432     if (N1CFP) {
13433       // Compute the reciprocal 1.0 / c2.
13434       const APFloat &N1APF = N1CFP->getValueAPF();
13435       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
13436       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
13437       // Only do the transform if the reciprocal is a legal fp immediate that
13438       // isn't too nasty (eg NaN, denormal, ...).
13439       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
13440           (!LegalOperations ||
13441            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
13442            // backend)... we should handle this gracefully after Legalize.
13443            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
13444            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13445            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
13446         return DAG.getNode(ISD::FMUL, DL, VT, N0,
13447                            DAG.getConstantFP(Recip, DL, VT));
13448     }
13449 
13450     // If this FDIV is part of a reciprocal square root, it may be folded
13451     // into a target-specific square root estimate instruction.
13452     if (N1.getOpcode() == ISD::FSQRT) {
13453       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
13454         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
13455     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
13456                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13457       if (SDValue RV =
13458               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
13459         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
13460         AddToWorklist(RV.getNode());
13461         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
13462       }
13463     } else if (N1.getOpcode() == ISD::FP_ROUND &&
13464                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13465       if (SDValue RV =
13466               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
13467         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
13468         AddToWorklist(RV.getNode());
13469         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
13470       }
13471     } else if (N1.getOpcode() == ISD::FMUL) {
13472       // Look through an FMUL. Even though this won't remove the FDIV directly,
13473       // it's still worthwhile to get rid of the FSQRT if possible.
13474       SDValue Sqrt, Y;
13475       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13476         Sqrt = N1.getOperand(0);
13477         Y = N1.getOperand(1);
13478       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
13479         Sqrt = N1.getOperand(1);
13480         Y = N1.getOperand(0);
13481       }
13482       if (Sqrt.getNode()) {
13483         // If the other multiply operand is known positive, pull it into the
13484         // sqrt. That will eliminate the division if we convert to an estimate.
13485         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
13486             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
13487           SDValue A;
13488           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
13489             A = Y.getOperand(0);
13490           else if (Y == Sqrt.getOperand(0))
13491             A = Y;
13492           if (A) {
13493             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
13494             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
13495             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
13496             SDValue AAZ =
13497                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
13498             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
13499               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
13500 
13501             // Estimate creation failed. Clean up speculatively created nodes.
13502             recursivelyDeleteUnusedNodes(AAZ.getNode());
13503           }
13504         }
13505 
13506         // We found a FSQRT, so try to make this fold:
13507         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
13508         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
13509           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
13510           AddToWorklist(Div.getNode());
13511           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
13512         }
13513       }
13514     }
13515 
13516     // Fold into a reciprocal estimate and multiply instead of a real divide.
13517     if (Options.NoInfsFPMath || Flags.hasNoInfs())
13518       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
13519         return RV;
13520   }
13521 
13522   // Fold X/Sqrt(X) -> Sqrt(X)
13523   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
13524       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
13525     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
13526       return N1;
13527 
13528   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
13529   TargetLowering::NegatibleCost CostN0 =
13530       TargetLowering::NegatibleCost::Expensive;
13531   TargetLowering::NegatibleCost CostN1 =
13532       TargetLowering::NegatibleCost::Expensive;
13533   SDValue NegN0 =
13534       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13535   SDValue NegN1 =
13536       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13537   if (NegN0 && NegN1 &&
13538       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13539        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13540     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
13541 
13542   return SDValue();
13543 }
13544 
13545 SDValue DAGCombiner::visitFREM(SDNode *N) {
13546   SDValue N0 = N->getOperand(0);
13547   SDValue N1 = N->getOperand(1);
13548   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13549   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13550   EVT VT = N->getValueType(0);
13551   SDNodeFlags Flags = N->getFlags();
13552   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13553 
13554   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13555     return R;
13556 
13557   // fold (frem c1, c2) -> fmod(c1,c2)
13558   if (N0CFP && N1CFP)
13559     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
13560 
13561   if (SDValue NewSel = foldBinOpIntoSelect(N))
13562     return NewSel;
13563 
13564   return SDValue();
13565 }
13566 
13567 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
13568   SDNodeFlags Flags = N->getFlags();
13569   const TargetOptions &Options = DAG.getTarget().Options;
13570 
13571   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
13572   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
13573   if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
13574       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
13575     return SDValue();
13576 
13577   SDValue N0 = N->getOperand(0);
13578   if (TLI.isFsqrtCheap(N0, DAG))
13579     return SDValue();
13580 
13581   // FSQRT nodes have flags that propagate to the created nodes.
13582   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
13583   //       transform the fdiv, we may produce a sub-optimal estimate sequence
13584   //       because the reciprocal calculation may not have to filter out a
13585   //       0.0 input.
13586   return buildSqrtEstimate(N0, Flags);
13587 }
13588 
13589 /// copysign(x, fp_extend(y)) -> copysign(x, y)
13590 /// copysign(x, fp_round(y)) -> copysign(x, y)
13591 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
13592   SDValue N1 = N->getOperand(1);
13593   if ((N1.getOpcode() == ISD::FP_EXTEND ||
13594        N1.getOpcode() == ISD::FP_ROUND)) {
13595     // Do not optimize out type conversion of f128 type yet.
13596     // For some targets like x86_64, configuration is changed to keep one f128
13597     // value in one SSE register, but instruction selection cannot handle
13598     // FCOPYSIGN on SSE registers yet.
13599     EVT N1VT = N1->getValueType(0);
13600     EVT N1Op0VT = N1->getOperand(0).getValueType();
13601     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
13602   }
13603   return false;
13604 }
13605 
13606 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
13607   SDValue N0 = N->getOperand(0);
13608   SDValue N1 = N->getOperand(1);
13609   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
13610   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
13611   EVT VT = N->getValueType(0);
13612 
13613   if (N0CFP && N1CFP) // Constant fold
13614     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
13615 
13616   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
13617     const APFloat &V = N1C->getValueAPF();
13618     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
13619     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
13620     if (!V.isNegative()) {
13621       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
13622         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13623     } else {
13624       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13625         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
13626                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
13627     }
13628   }
13629 
13630   // copysign(fabs(x), y) -> copysign(x, y)
13631   // copysign(fneg(x), y) -> copysign(x, y)
13632   // copysign(copysign(x,z), y) -> copysign(x, y)
13633   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
13634       N0.getOpcode() == ISD::FCOPYSIGN)
13635     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
13636 
13637   // copysign(x, abs(y)) -> abs(x)
13638   if (N1.getOpcode() == ISD::FABS)
13639     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13640 
13641   // copysign(x, copysign(y,z)) -> copysign(x, z)
13642   if (N1.getOpcode() == ISD::FCOPYSIGN)
13643     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
13644 
13645   // copysign(x, fp_extend(y)) -> copysign(x, y)
13646   // copysign(x, fp_round(y)) -> copysign(x, y)
13647   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
13648     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
13649 
13650   return SDValue();
13651 }
13652 
13653 SDValue DAGCombiner::visitFPOW(SDNode *N) {
13654   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
13655   if (!ExponentC)
13656     return SDValue();
13657   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13658 
13659   // Try to convert x ** (1/3) into cube root.
13660   // TODO: Handle the various flavors of long double.
13661   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
13662   //       Some range near 1/3 should be fine.
13663   EVT VT = N->getValueType(0);
13664   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
13665       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
13666     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
13667     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
13668     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
13669     // For regular numbers, rounding may cause the results to differ.
13670     // Therefore, we require { nsz ninf nnan afn } for this transform.
13671     // TODO: We could select out the special cases if we don't have nsz/ninf.
13672     SDNodeFlags Flags = N->getFlags();
13673     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
13674         !Flags.hasApproximateFuncs())
13675       return SDValue();
13676 
13677     // Do not create a cbrt() libcall if the target does not have it, and do not
13678     // turn a pow that has lowering support into a cbrt() libcall.
13679     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
13680         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
13681          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
13682       return SDValue();
13683 
13684     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
13685   }
13686 
13687   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
13688   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
13689   // TODO: This could be extended (using a target hook) to handle smaller
13690   // power-of-2 fractional exponents.
13691   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
13692   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
13693   if (ExponentIs025 || ExponentIs075) {
13694     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
13695     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
13696     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
13697     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
13698     // For regular numbers, rounding may cause the results to differ.
13699     // Therefore, we require { nsz ninf afn } for this transform.
13700     // TODO: We could select out the special cases if we don't have nsz/ninf.
13701     SDNodeFlags Flags = N->getFlags();
13702 
13703     // We only need no signed zeros for the 0.25 case.
13704     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
13705         !Flags.hasApproximateFuncs())
13706       return SDValue();
13707 
13708     // Don't double the number of libcalls. We are trying to inline fast code.
13709     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
13710       return SDValue();
13711 
13712     // Assume that libcalls are the smallest code.
13713     // TODO: This restriction should probably be lifted for vectors.
13714     if (ForCodeSize)
13715       return SDValue();
13716 
13717     // pow(X, 0.25) --> sqrt(sqrt(X))
13718     SDLoc DL(N);
13719     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
13720     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
13721     if (ExponentIs025)
13722       return SqrtSqrt;
13723     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
13724     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
13725   }
13726 
13727   return SDValue();
13728 }
13729 
13730 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
13731                                const TargetLowering &TLI) {
13732   // This optimization is guarded by a function attribute because it may produce
13733   // unexpected results. Ie, programs may be relying on the platform-specific
13734   // undefined behavior when the float-to-int conversion overflows.
13735   const Function &F = DAG.getMachineFunction().getFunction();
13736   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
13737   if (StrictOverflow.getValueAsString().equals("false"))
13738     return SDValue();
13739 
13740   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
13741   // replacing casts with a libcall. We also must be allowed to ignore -0.0
13742   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
13743   // conversions would return +0.0.
13744   // FIXME: We should be able to use node-level FMF here.
13745   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
13746   EVT VT = N->getValueType(0);
13747   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
13748       !DAG.getTarget().Options.NoSignedZerosFPMath)
13749     return SDValue();
13750 
13751   // fptosi/fptoui round towards zero, so converting from FP to integer and
13752   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
13753   SDValue N0 = N->getOperand(0);
13754   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
13755       N0.getOperand(0).getValueType() == VT)
13756     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13757 
13758   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
13759       N0.getOperand(0).getValueType() == VT)
13760     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13761 
13762   return SDValue();
13763 }
13764 
13765 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
13766   SDValue N0 = N->getOperand(0);
13767   EVT VT = N->getValueType(0);
13768   EVT OpVT = N0.getValueType();
13769 
13770   // [us]itofp(undef) = 0, because the result value is bounded.
13771   if (N0.isUndef())
13772     return DAG.getConstantFP(0.0, SDLoc(N), VT);
13773 
13774   // fold (sint_to_fp c1) -> c1fp
13775   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13776       // ...but only if the target supports immediate floating-point values
13777       (!LegalOperations ||
13778        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13779     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13780 
13781   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
13782   // but UINT_TO_FP is legal on this target, try to convert.
13783   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
13784       hasOperation(ISD::UINT_TO_FP, OpVT)) {
13785     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
13786     if (DAG.SignBitIsZero(N0))
13787       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13788   }
13789 
13790   // The next optimizations are desirable only if SELECT_CC can be lowered.
13791   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
13792   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
13793       !VT.isVector() &&
13794       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13795     SDLoc DL(N);
13796     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
13797                          DAG.getConstantFP(0.0, DL, VT));
13798   }
13799 
13800   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
13801   //      (select (setcc x, y, cc), 1.0, 0.0)
13802   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
13803       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
13804       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13805     SDLoc DL(N);
13806     return DAG.getSelect(DL, VT, N0.getOperand(0),
13807                          DAG.getConstantFP(1.0, DL, VT),
13808                          DAG.getConstantFP(0.0, DL, VT));
13809   }
13810 
13811   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13812     return FTrunc;
13813 
13814   return SDValue();
13815 }
13816 
13817 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
13818   SDValue N0 = N->getOperand(0);
13819   EVT VT = N->getValueType(0);
13820   EVT OpVT = N0.getValueType();
13821 
13822   // [us]itofp(undef) = 0, because the result value is bounded.
13823   if (N0.isUndef())
13824     return DAG.getConstantFP(0.0, SDLoc(N), VT);
13825 
13826   // fold (uint_to_fp c1) -> c1fp
13827   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13828       // ...but only if the target supports immediate floating-point values
13829       (!LegalOperations ||
13830        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13831     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13832 
13833   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
13834   // but SINT_TO_FP is legal on this target, try to convert.
13835   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
13836       hasOperation(ISD::SINT_TO_FP, OpVT)) {
13837     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
13838     if (DAG.SignBitIsZero(N0))
13839       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13840   }
13841 
13842   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
13843   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
13844       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13845     SDLoc DL(N);
13846     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
13847                          DAG.getConstantFP(0.0, DL, VT));
13848   }
13849 
13850   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13851     return FTrunc;
13852 
13853   return SDValue();
13854 }
13855 
13856 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
13857 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
13858   SDValue N0 = N->getOperand(0);
13859   EVT VT = N->getValueType(0);
13860 
13861   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
13862     return SDValue();
13863 
13864   SDValue Src = N0.getOperand(0);
13865   EVT SrcVT = Src.getValueType();
13866   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
13867   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
13868 
13869   // We can safely assume the conversion won't overflow the output range,
13870   // because (for example) (uint8_t)18293.f is undefined behavior.
13871 
13872   // Since we can assume the conversion won't overflow, our decision as to
13873   // whether the input will fit in the float should depend on the minimum
13874   // of the input range and output range.
13875 
13876   // This means this is also safe for a signed input and unsigned output, since
13877   // a negative input would lead to undefined behavior.
13878   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13879   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13880   unsigned ActualSize = std::min(InputSize, OutputSize);
13881   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13882 
13883   // We can only fold away the float conversion if the input range can be
13884   // represented exactly in the float range.
13885   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13886     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13887       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
13888                                                        : ISD::ZERO_EXTEND;
13889       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13890     }
13891     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13892       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13893     return DAG.getBitcast(VT, Src);
13894   }
13895   return SDValue();
13896 }
13897 
13898 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13899   SDValue N0 = N->getOperand(0);
13900   EVT VT = N->getValueType(0);
13901 
13902   // fold (fp_to_sint undef) -> undef
13903   if (N0.isUndef())
13904     return DAG.getUNDEF(VT);
13905 
13906   // fold (fp_to_sint c1fp) -> c1
13907   if (isConstantFPBuildVectorOrConstantFP(N0))
13908     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13909 
13910   return FoldIntToFPToInt(N, DAG);
13911 }
13912 
13913 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13914   SDValue N0 = N->getOperand(0);
13915   EVT VT = N->getValueType(0);
13916 
13917   // fold (fp_to_uint undef) -> undef
13918   if (N0.isUndef())
13919     return DAG.getUNDEF(VT);
13920 
13921   // fold (fp_to_uint c1fp) -> c1
13922   if (isConstantFPBuildVectorOrConstantFP(N0))
13923     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13924 
13925   return FoldIntToFPToInt(N, DAG);
13926 }
13927 
13928 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13929   SDValue N0 = N->getOperand(0);
13930   SDValue N1 = N->getOperand(1);
13931   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13932   EVT VT = N->getValueType(0);
13933 
13934   // fold (fp_round c1fp) -> c1fp
13935   if (N0CFP)
13936     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13937 
13938   // fold (fp_round (fp_extend x)) -> x
13939   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
13940     return N0.getOperand(0);
13941 
13942   // fold (fp_round (fp_round x)) -> (fp_round x)
13943   if (N0.getOpcode() == ISD::FP_ROUND) {
13944     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13945     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13946 
13947     // Skip this folding if it results in an fp_round from f80 to f16.
13948     //
13949     // f80 to f16 always generates an expensive (and as yet, unimplemented)
13950     // libcall to __truncxfhf2 instead of selecting native f16 conversion
13951     // instructions from f32 or f64.  Moreover, the first (value-preserving)
13952     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13953     // x86.
13954     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
13955       return SDValue();
13956 
13957     // If the first fp_round isn't a value preserving truncation, it might
13958     // introduce a tie in the second fp_round, that wouldn't occur in the
13959     // single-step fp_round we want to fold to.
13960     // In other words, double rounding isn't the same as rounding.
13961     // Also, this is a value preserving truncation iff both fp_round's are.
13962     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13963       SDLoc DL(N);
13964       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13965                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13966     }
13967   }
13968 
13969   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13970   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13971     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13972                               N0.getOperand(0), N1);
13973     AddToWorklist(Tmp.getNode());
13974     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13975                        Tmp, N0.getOperand(1));
13976   }
13977 
13978   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13979     return NewVSel;
13980 
13981   return SDValue();
13982 }
13983 
13984 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13985   SDValue N0 = N->getOperand(0);
13986   EVT VT = N->getValueType(0);
13987 
13988   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13989   if (N->hasOneUse() &&
13990       N->use_begin()->getOpcode() == ISD::FP_ROUND)
13991     return SDValue();
13992 
13993   // fold (fp_extend c1fp) -> c1fp
13994   if (isConstantFPBuildVectorOrConstantFP(N0))
13995     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13996 
13997   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13998   if (N0.getOpcode() == ISD::FP16_TO_FP &&
13999       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14000     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14001 
14002   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14003   // value of X.
14004   if (N0.getOpcode() == ISD::FP_ROUND
14005       && N0.getConstantOperandVal(1) == 1) {
14006     SDValue In = N0.getOperand(0);
14007     if (In.getValueType() == VT) return In;
14008     if (VT.bitsLT(In.getValueType()))
14009       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14010                          In, N0.getOperand(1));
14011     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14012   }
14013 
14014   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14015   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14016        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14017     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14018     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14019                                      LN0->getChain(),
14020                                      LN0->getBasePtr(), N0.getValueType(),
14021                                      LN0->getMemOperand());
14022     CombineTo(N, ExtLoad);
14023     CombineTo(N0.getNode(),
14024               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14025                           N0.getValueType(), ExtLoad,
14026                           DAG.getIntPtrConstant(1, SDLoc(N0))),
14027               ExtLoad.getValue(1));
14028     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14029   }
14030 
14031   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14032     return NewVSel;
14033 
14034   return SDValue();
14035 }
14036 
14037 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14038   SDValue N0 = N->getOperand(0);
14039   EVT VT = N->getValueType(0);
14040 
14041   // fold (fceil c1) -> fceil(c1)
14042   if (isConstantFPBuildVectorOrConstantFP(N0))
14043     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14044 
14045   return SDValue();
14046 }
14047 
14048 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14049   SDValue N0 = N->getOperand(0);
14050   EVT VT = N->getValueType(0);
14051 
14052   // fold (ftrunc c1) -> ftrunc(c1)
14053   if (isConstantFPBuildVectorOrConstantFP(N0))
14054     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14055 
14056   // fold ftrunc (known rounded int x) -> x
14057   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14058   // likely to be generated to extract integer from a rounded floating value.
14059   switch (N0.getOpcode()) {
14060   default: break;
14061   case ISD::FRINT:
14062   case ISD::FTRUNC:
14063   case ISD::FNEARBYINT:
14064   case ISD::FFLOOR:
14065   case ISD::FCEIL:
14066     return N0;
14067   }
14068 
14069   return SDValue();
14070 }
14071 
14072 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14073   SDValue N0 = N->getOperand(0);
14074   EVT VT = N->getValueType(0);
14075 
14076   // fold (ffloor c1) -> ffloor(c1)
14077   if (isConstantFPBuildVectorOrConstantFP(N0))
14078     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14079 
14080   return SDValue();
14081 }
14082 
14083 SDValue DAGCombiner::visitFNEG(SDNode *N) {
14084   SDValue N0 = N->getOperand(0);
14085   EVT VT = N->getValueType(0);
14086   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14087 
14088   // Constant fold FNEG.
14089   if (isConstantFPBuildVectorOrConstantFP(N0))
14090     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14091 
14092   if (SDValue NegN0 =
14093           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14094     return NegN0;
14095 
14096   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14097   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14098   // know it was called from a context with a nsz flag if the input fsub does
14099   // not.
14100   if (N0.getOpcode() == ISD::FSUB &&
14101       (DAG.getTarget().Options.NoSignedZerosFPMath ||
14102        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14103     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
14104                        N0.getOperand(0));
14105   }
14106 
14107   if (SDValue Cast = foldSignChangeInBitcast(N))
14108     return Cast;
14109 
14110   return SDValue();
14111 }
14112 
14113 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
14114                             APFloat (*Op)(const APFloat &, const APFloat &)) {
14115   SDValue N0 = N->getOperand(0);
14116   SDValue N1 = N->getOperand(1);
14117   EVT VT = N->getValueType(0);
14118   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
14119   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
14120   const SDNodeFlags Flags = N->getFlags();
14121   unsigned Opc = N->getOpcode();
14122   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
14123   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
14124   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14125 
14126   if (N0CFP && N1CFP) {
14127     const APFloat &C0 = N0CFP->getValueAPF();
14128     const APFloat &C1 = N1CFP->getValueAPF();
14129     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
14130   }
14131 
14132   // Canonicalize to constant on RHS.
14133   if (isConstantFPBuildVectorOrConstantFP(N0) &&
14134       !isConstantFPBuildVectorOrConstantFP(N1))
14135     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
14136 
14137   if (N1CFP) {
14138     const APFloat &AF = N1CFP->getValueAPF();
14139 
14140     // minnum(X, nan) -> X
14141     // maxnum(X, nan) -> X
14142     // minimum(X, nan) -> nan
14143     // maximum(X, nan) -> nan
14144     if (AF.isNaN())
14145       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
14146 
14147     // In the following folds, inf can be replaced with the largest finite
14148     // float, if the ninf flag is set.
14149     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
14150       // minnum(X, -inf) -> -inf
14151       // maxnum(X, +inf) -> +inf
14152       // minimum(X, -inf) -> -inf if nnan
14153       // maximum(X, +inf) -> +inf if nnan
14154       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
14155         return N->getOperand(1);
14156 
14157       // minnum(X, +inf) -> X if nnan
14158       // maxnum(X, -inf) -> X if nnan
14159       // minimum(X, +inf) -> X
14160       // maximum(X, -inf) -> X
14161       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
14162         return N->getOperand(0);
14163     }
14164   }
14165 
14166   return SDValue();
14167 }
14168 
14169 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
14170   return visitFMinMax(DAG, N, minnum);
14171 }
14172 
14173 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
14174   return visitFMinMax(DAG, N, maxnum);
14175 }
14176 
14177 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
14178   return visitFMinMax(DAG, N, minimum);
14179 }
14180 
14181 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
14182   return visitFMinMax(DAG, N, maximum);
14183 }
14184 
14185 SDValue DAGCombiner::visitFABS(SDNode *N) {
14186   SDValue N0 = N->getOperand(0);
14187   EVT VT = N->getValueType(0);
14188 
14189   // fold (fabs c1) -> fabs(c1)
14190   if (isConstantFPBuildVectorOrConstantFP(N0))
14191     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14192 
14193   // fold (fabs (fabs x)) -> (fabs x)
14194   if (N0.getOpcode() == ISD::FABS)
14195     return N->getOperand(0);
14196 
14197   // fold (fabs (fneg x)) -> (fabs x)
14198   // fold (fabs (fcopysign x, y)) -> (fabs x)
14199   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
14200     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
14201 
14202   if (SDValue Cast = foldSignChangeInBitcast(N))
14203     return Cast;
14204 
14205   return SDValue();
14206 }
14207 
14208 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
14209   SDValue Chain = N->getOperand(0);
14210   SDValue N1 = N->getOperand(1);
14211   SDValue N2 = N->getOperand(2);
14212 
14213   // If N is a constant we could fold this into a fallthrough or unconditional
14214   // branch. However that doesn't happen very often in normal code, because
14215   // Instcombine/SimplifyCFG should have handled the available opportunities.
14216   // If we did this folding here, it would be necessary to update the
14217   // MachineBasicBlock CFG, which is awkward.
14218 
14219   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
14220   // on the target.
14221   if (N1.getOpcode() == ISD::SETCC &&
14222       TLI.isOperationLegalOrCustom(ISD::BR_CC,
14223                                    N1.getOperand(0).getValueType())) {
14224     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14225                        Chain, N1.getOperand(2),
14226                        N1.getOperand(0), N1.getOperand(1), N2);
14227   }
14228 
14229   if (N1.hasOneUse()) {
14230     // rebuildSetCC calls visitXor which may change the Chain when there is a
14231     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
14232     HandleSDNode ChainHandle(Chain);
14233     if (SDValue NewN1 = rebuildSetCC(N1))
14234       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
14235                          ChainHandle.getValue(), NewN1, N2);
14236   }
14237 
14238   return SDValue();
14239 }
14240 
14241 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
14242   if (N.getOpcode() == ISD::SRL ||
14243       (N.getOpcode() == ISD::TRUNCATE &&
14244        (N.getOperand(0).hasOneUse() &&
14245         N.getOperand(0).getOpcode() == ISD::SRL))) {
14246     // Look pass the truncate.
14247     if (N.getOpcode() == ISD::TRUNCATE)
14248       N = N.getOperand(0);
14249 
14250     // Match this pattern so that we can generate simpler code:
14251     //
14252     //   %a = ...
14253     //   %b = and i32 %a, 2
14254     //   %c = srl i32 %b, 1
14255     //   brcond i32 %c ...
14256     //
14257     // into
14258     //
14259     //   %a = ...
14260     //   %b = and i32 %a, 2
14261     //   %c = setcc eq %b, 0
14262     //   brcond %c ...
14263     //
14264     // This applies only when the AND constant value has one bit set and the
14265     // SRL constant is equal to the log2 of the AND constant. The back-end is
14266     // smart enough to convert the result into a TEST/JMP sequence.
14267     SDValue Op0 = N.getOperand(0);
14268     SDValue Op1 = N.getOperand(1);
14269 
14270     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
14271       SDValue AndOp1 = Op0.getOperand(1);
14272 
14273       if (AndOp1.getOpcode() == ISD::Constant) {
14274         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
14275 
14276         if (AndConst.isPowerOf2() &&
14277             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
14278           SDLoc DL(N);
14279           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
14280                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
14281                               ISD::SETNE);
14282         }
14283       }
14284     }
14285   }
14286 
14287   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
14288   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
14289   if (N.getOpcode() == ISD::XOR) {
14290     // Because we may call this on a speculatively constructed
14291     // SimplifiedSetCC Node, we need to simplify this node first.
14292     // Ideally this should be folded into SimplifySetCC and not
14293     // here. For now, grab a handle to N so we don't lose it from
14294     // replacements interal to the visit.
14295     HandleSDNode XORHandle(N);
14296     while (N.getOpcode() == ISD::XOR) {
14297       SDValue Tmp = visitXOR(N.getNode());
14298       // No simplification done.
14299       if (!Tmp.getNode())
14300         break;
14301       // Returning N is form in-visit replacement that may invalidated
14302       // N. Grab value from Handle.
14303       if (Tmp.getNode() == N.getNode())
14304         N = XORHandle.getValue();
14305       else // Node simplified. Try simplifying again.
14306         N = Tmp;
14307     }
14308 
14309     if (N.getOpcode() != ISD::XOR)
14310       return N;
14311 
14312     SDValue Op0 = N->getOperand(0);
14313     SDValue Op1 = N->getOperand(1);
14314 
14315     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
14316       bool Equal = false;
14317       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
14318       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
14319           Op0.getValueType() == MVT::i1) {
14320         N = Op0;
14321         Op0 = N->getOperand(0);
14322         Op1 = N->getOperand(1);
14323         Equal = true;
14324       }
14325 
14326       EVT SetCCVT = N.getValueType();
14327       if (LegalTypes)
14328         SetCCVT = getSetCCResultType(SetCCVT);
14329       // Replace the uses of XOR with SETCC
14330       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
14331                           Equal ? ISD::SETEQ : ISD::SETNE);
14332     }
14333   }
14334 
14335   return SDValue();
14336 }
14337 
14338 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
14339 //
14340 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
14341   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
14342   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
14343 
14344   // If N is a constant we could fold this into a fallthrough or unconditional
14345   // branch. However that doesn't happen very often in normal code, because
14346   // Instcombine/SimplifyCFG should have handled the available opportunities.
14347   // If we did this folding here, it would be necessary to update the
14348   // MachineBasicBlock CFG, which is awkward.
14349 
14350   // Use SimplifySetCC to simplify SETCC's.
14351   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
14352                                CondLHS, CondRHS, CC->get(), SDLoc(N),
14353                                false);
14354   if (Simp.getNode()) AddToWorklist(Simp.getNode());
14355 
14356   // fold to a simpler setcc
14357   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
14358     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14359                        N->getOperand(0), Simp.getOperand(2),
14360                        Simp.getOperand(0), Simp.getOperand(1),
14361                        N->getOperand(4));
14362 
14363   return SDValue();
14364 }
14365 
14366 /// Return true if 'Use' is a load or a store that uses N as its base pointer
14367 /// and that N may be folded in the load / store addressing mode.
14368 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
14369                                     SelectionDAG &DAG,
14370                                     const TargetLowering &TLI) {
14371   EVT VT;
14372   unsigned AS;
14373 
14374   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
14375     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
14376       return false;
14377     VT = LD->getMemoryVT();
14378     AS = LD->getAddressSpace();
14379   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
14380     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
14381       return false;
14382     VT = ST->getMemoryVT();
14383     AS = ST->getAddressSpace();
14384   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
14385     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
14386       return false;
14387     VT = LD->getMemoryVT();
14388     AS = LD->getAddressSpace();
14389   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
14390     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
14391       return false;
14392     VT = ST->getMemoryVT();
14393     AS = ST->getAddressSpace();
14394   } else
14395     return false;
14396 
14397   TargetLowering::AddrMode AM;
14398   if (N->getOpcode() == ISD::ADD) {
14399     AM.HasBaseReg = true;
14400     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
14401     if (Offset)
14402       // [reg +/- imm]
14403       AM.BaseOffs = Offset->getSExtValue();
14404     else
14405       // [reg +/- reg]
14406       AM.Scale = 1;
14407   } else if (N->getOpcode() == ISD::SUB) {
14408     AM.HasBaseReg = true;
14409     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
14410     if (Offset)
14411       // [reg +/- imm]
14412       AM.BaseOffs = -Offset->getSExtValue();
14413     else
14414       // [reg +/- reg]
14415       AM.Scale = 1;
14416   } else
14417     return false;
14418 
14419   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
14420                                    VT.getTypeForEVT(*DAG.getContext()), AS);
14421 }
14422 
14423 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
14424                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
14425                                      const TargetLowering &TLI) {
14426   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
14427     if (LD->isIndexed())
14428       return false;
14429     EVT VT = LD->getMemoryVT();
14430     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
14431       return false;
14432     Ptr = LD->getBasePtr();
14433   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
14434     if (ST->isIndexed())
14435       return false;
14436     EVT VT = ST->getMemoryVT();
14437     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
14438       return false;
14439     Ptr = ST->getBasePtr();
14440     IsLoad = false;
14441   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
14442     if (LD->isIndexed())
14443       return false;
14444     EVT VT = LD->getMemoryVT();
14445     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
14446         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
14447       return false;
14448     Ptr = LD->getBasePtr();
14449     IsMasked = true;
14450   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
14451     if (ST->isIndexed())
14452       return false;
14453     EVT VT = ST->getMemoryVT();
14454     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
14455         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
14456       return false;
14457     Ptr = ST->getBasePtr();
14458     IsLoad = false;
14459     IsMasked = true;
14460   } else {
14461     return false;
14462   }
14463   return true;
14464 }
14465 
14466 /// Try turning a load/store into a pre-indexed load/store when the base
14467 /// pointer is an add or subtract and it has other uses besides the load/store.
14468 /// After the transformation, the new indexed load/store has effectively folded
14469 /// the add/subtract in and all of its other uses are redirected to the
14470 /// new load/store.
14471 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
14472   if (Level < AfterLegalizeDAG)
14473     return false;
14474 
14475   bool IsLoad = true;
14476   bool IsMasked = false;
14477   SDValue Ptr;
14478   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
14479                                 Ptr, TLI))
14480     return false;
14481 
14482   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
14483   // out.  There is no reason to make this a preinc/predec.
14484   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
14485       Ptr.getNode()->hasOneUse())
14486     return false;
14487 
14488   // Ask the target to do addressing mode selection.
14489   SDValue BasePtr;
14490   SDValue Offset;
14491   ISD::MemIndexedMode AM = ISD::UNINDEXED;
14492   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
14493     return false;
14494 
14495   // Backends without true r+i pre-indexed forms may need to pass a
14496   // constant base with a variable offset so that constant coercion
14497   // will work with the patterns in canonical form.
14498   bool Swapped = false;
14499   if (isa<ConstantSDNode>(BasePtr)) {
14500     std::swap(BasePtr, Offset);
14501     Swapped = true;
14502   }
14503 
14504   // Don't create a indexed load / store with zero offset.
14505   if (isNullConstant(Offset))
14506     return false;
14507 
14508   // Try turning it into a pre-indexed load / store except when:
14509   // 1) The new base ptr is a frame index.
14510   // 2) If N is a store and the new base ptr is either the same as or is a
14511   //    predecessor of the value being stored.
14512   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
14513   //    that would create a cycle.
14514   // 4) All uses are load / store ops that use it as old base ptr.
14515 
14516   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
14517   // (plus the implicit offset) to a register to preinc anyway.
14518   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14519     return false;
14520 
14521   // Check #2.
14522   if (!IsLoad) {
14523     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
14524                            : cast<StoreSDNode>(N)->getValue();
14525 
14526     // Would require a copy.
14527     if (Val == BasePtr)
14528       return false;
14529 
14530     // Would create a cycle.
14531     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
14532       return false;
14533   }
14534 
14535   // Caches for hasPredecessorHelper.
14536   SmallPtrSet<const SDNode *, 32> Visited;
14537   SmallVector<const SDNode *, 16> Worklist;
14538   Worklist.push_back(N);
14539 
14540   // If the offset is a constant, there may be other adds of constants that
14541   // can be folded with this one. We should do this to avoid having to keep
14542   // a copy of the original base pointer.
14543   SmallVector<SDNode *, 16> OtherUses;
14544   if (isa<ConstantSDNode>(Offset))
14545     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
14546                               UE = BasePtr.getNode()->use_end();
14547          UI != UE; ++UI) {
14548       SDUse &Use = UI.getUse();
14549       // Skip the use that is Ptr and uses of other results from BasePtr's
14550       // node (important for nodes that return multiple results).
14551       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
14552         continue;
14553 
14554       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
14555         continue;
14556 
14557       if (Use.getUser()->getOpcode() != ISD::ADD &&
14558           Use.getUser()->getOpcode() != ISD::SUB) {
14559         OtherUses.clear();
14560         break;
14561       }
14562 
14563       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
14564       if (!isa<ConstantSDNode>(Op1)) {
14565         OtherUses.clear();
14566         break;
14567       }
14568 
14569       // FIXME: In some cases, we can be smarter about this.
14570       if (Op1.getValueType() != Offset.getValueType()) {
14571         OtherUses.clear();
14572         break;
14573       }
14574 
14575       OtherUses.push_back(Use.getUser());
14576     }
14577 
14578   if (Swapped)
14579     std::swap(BasePtr, Offset);
14580 
14581   // Now check for #3 and #4.
14582   bool RealUse = false;
14583 
14584   for (SDNode *Use : Ptr.getNode()->uses()) {
14585     if (Use == N)
14586       continue;
14587     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
14588       return false;
14589 
14590     // If Ptr may be folded in addressing mode of other use, then it's
14591     // not profitable to do this transformation.
14592     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
14593       RealUse = true;
14594   }
14595 
14596   if (!RealUse)
14597     return false;
14598 
14599   SDValue Result;
14600   if (!IsMasked) {
14601     if (IsLoad)
14602       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14603     else
14604       Result =
14605           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14606   } else {
14607     if (IsLoad)
14608       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14609                                         Offset, AM);
14610     else
14611       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
14612                                          Offset, AM);
14613   }
14614   ++PreIndexedNodes;
14615   ++NodesCombined;
14616   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
14617              Result.getNode()->dump(&DAG); dbgs() << '\n');
14618   WorklistRemover DeadNodes(*this);
14619   if (IsLoad) {
14620     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14621     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14622   } else {
14623     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14624   }
14625 
14626   // Finally, since the node is now dead, remove it from the graph.
14627   deleteAndRecombine(N);
14628 
14629   if (Swapped)
14630     std::swap(BasePtr, Offset);
14631 
14632   // Replace other uses of BasePtr that can be updated to use Ptr
14633   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
14634     unsigned OffsetIdx = 1;
14635     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
14636       OffsetIdx = 0;
14637     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
14638            BasePtr.getNode() && "Expected BasePtr operand");
14639 
14640     // We need to replace ptr0 in the following expression:
14641     //   x0 * offset0 + y0 * ptr0 = t0
14642     // knowing that
14643     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
14644     //
14645     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
14646     // indexed load/store and the expression that needs to be re-written.
14647     //
14648     // Therefore, we have:
14649     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
14650 
14651     ConstantSDNode *CN =
14652       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
14653     int X0, X1, Y0, Y1;
14654     const APInt &Offset0 = CN->getAPIntValue();
14655     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
14656 
14657     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
14658     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
14659     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
14660     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
14661 
14662     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
14663 
14664     APInt CNV = Offset0;
14665     if (X0 < 0) CNV = -CNV;
14666     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
14667     else CNV = CNV - Offset1;
14668 
14669     SDLoc DL(OtherUses[i]);
14670 
14671     // We can now generate the new expression.
14672     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
14673     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
14674 
14675     SDValue NewUse = DAG.getNode(Opcode,
14676                                  DL,
14677                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
14678     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
14679     deleteAndRecombine(OtherUses[i]);
14680   }
14681 
14682   // Replace the uses of Ptr with uses of the updated base value.
14683   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
14684   deleteAndRecombine(Ptr.getNode());
14685   AddToWorklist(Result.getNode());
14686 
14687   return true;
14688 }
14689 
14690 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
14691                                    SDValue &BasePtr, SDValue &Offset,
14692                                    ISD::MemIndexedMode &AM,
14693                                    SelectionDAG &DAG,
14694                                    const TargetLowering &TLI) {
14695   if (PtrUse == N ||
14696       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
14697     return false;
14698 
14699   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
14700     return false;
14701 
14702   // Don't create a indexed load / store with zero offset.
14703   if (isNullConstant(Offset))
14704     return false;
14705 
14706   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14707     return false;
14708 
14709   SmallPtrSet<const SDNode *, 32> Visited;
14710   for (SDNode *Use : BasePtr.getNode()->uses()) {
14711     if (Use == Ptr.getNode())
14712       continue;
14713 
14714     // No if there's a later user which could perform the index instead.
14715     if (isa<MemSDNode>(Use)) {
14716       bool IsLoad = true;
14717       bool IsMasked = false;
14718       SDValue OtherPtr;
14719       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
14720                                    IsMasked, OtherPtr, TLI)) {
14721         SmallVector<const SDNode *, 2> Worklist;
14722         Worklist.push_back(Use);
14723         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
14724           return false;
14725       }
14726     }
14727 
14728     // If all the uses are load / store addresses, then don't do the
14729     // transformation.
14730     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
14731       for (SDNode *UseUse : Use->uses())
14732         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
14733           return false;
14734     }
14735   }
14736   return true;
14737 }
14738 
14739 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
14740                                          bool &IsMasked, SDValue &Ptr,
14741                                          SDValue &BasePtr, SDValue &Offset,
14742                                          ISD::MemIndexedMode &AM,
14743                                          SelectionDAG &DAG,
14744                                          const TargetLowering &TLI) {
14745   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
14746                                 IsMasked, Ptr, TLI) ||
14747       Ptr.getNode()->hasOneUse())
14748     return nullptr;
14749 
14750   // Try turning it into a post-indexed load / store except when
14751   // 1) All uses are load / store ops that use it as base ptr (and
14752   //    it may be folded as addressing mmode).
14753   // 2) Op must be independent of N, i.e. Op is neither a predecessor
14754   //    nor a successor of N. Otherwise, if Op is folded that would
14755   //    create a cycle.
14756   for (SDNode *Op : Ptr->uses()) {
14757     // Check for #1.
14758     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
14759       continue;
14760 
14761     // Check for #2.
14762     SmallPtrSet<const SDNode *, 32> Visited;
14763     SmallVector<const SDNode *, 8> Worklist;
14764     // Ptr is predecessor to both N and Op.
14765     Visited.insert(Ptr.getNode());
14766     Worklist.push_back(N);
14767     Worklist.push_back(Op);
14768     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
14769         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
14770       return Op;
14771   }
14772   return nullptr;
14773 }
14774 
14775 /// Try to combine a load/store with a add/sub of the base pointer node into a
14776 /// post-indexed load/store. The transformation folded the add/subtract into the
14777 /// new indexed load/store effectively and all of its uses are redirected to the
14778 /// new load/store.
14779 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
14780   if (Level < AfterLegalizeDAG)
14781     return false;
14782 
14783   bool IsLoad = true;
14784   bool IsMasked = false;
14785   SDValue Ptr;
14786   SDValue BasePtr;
14787   SDValue Offset;
14788   ISD::MemIndexedMode AM = ISD::UNINDEXED;
14789   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
14790                                          Offset, AM, DAG, TLI);
14791   if (!Op)
14792     return false;
14793 
14794   SDValue Result;
14795   if (!IsMasked)
14796     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14797                                          Offset, AM)
14798                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
14799                                           BasePtr, Offset, AM);
14800   else
14801     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
14802                                                BasePtr, Offset, AM)
14803                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
14804                                                 BasePtr, Offset, AM);
14805   ++PostIndexedNodes;
14806   ++NodesCombined;
14807   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
14808              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
14809              dbgs() << '\n');
14810   WorklistRemover DeadNodes(*this);
14811   if (IsLoad) {
14812     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14813     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14814   } else {
14815     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14816   }
14817 
14818   // Finally, since the node is now dead, remove it from the graph.
14819   deleteAndRecombine(N);
14820 
14821   // Replace the uses of Use with uses of the updated base value.
14822   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
14823                                 Result.getValue(IsLoad ? 1 : 0));
14824   deleteAndRecombine(Op);
14825   return true;
14826 }
14827 
14828 /// Return the base-pointer arithmetic from an indexed \p LD.
14829 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
14830   ISD::MemIndexedMode AM = LD->getAddressingMode();
14831   assert(AM != ISD::UNINDEXED);
14832   SDValue BP = LD->getOperand(1);
14833   SDValue Inc = LD->getOperand(2);
14834 
14835   // Some backends use TargetConstants for load offsets, but don't expect
14836   // TargetConstants in general ADD nodes. We can convert these constants into
14837   // regular Constants (if the constant is not opaque).
14838   assert((Inc.getOpcode() != ISD::TargetConstant ||
14839           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
14840          "Cannot split out indexing using opaque target constants");
14841   if (Inc.getOpcode() == ISD::TargetConstant) {
14842     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
14843     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
14844                           ConstInc->getValueType(0));
14845   }
14846 
14847   unsigned Opc =
14848       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
14849   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
14850 }
14851 
14852 static inline ElementCount numVectorEltsOrZero(EVT T) {
14853   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
14854 }
14855 
14856 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
14857   Val = ST->getValue();
14858   EVT STType = Val.getValueType();
14859   EVT STMemType = ST->getMemoryVT();
14860   if (STType == STMemType)
14861     return true;
14862   if (isTypeLegal(STMemType))
14863     return false; // fail.
14864   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
14865       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
14866     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
14867     return true;
14868   }
14869   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
14870       STType.isInteger() && STMemType.isInteger()) {
14871     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
14872     return true;
14873   }
14874   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
14875     Val = DAG.getBitcast(STMemType, Val);
14876     return true;
14877   }
14878   return false; // fail.
14879 }
14880 
14881 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
14882   EVT LDMemType = LD->getMemoryVT();
14883   EVT LDType = LD->getValueType(0);
14884   assert(Val.getValueType() == LDMemType &&
14885          "Attempting to extend value of non-matching type");
14886   if (LDType == LDMemType)
14887     return true;
14888   if (LDMemType.isInteger() && LDType.isInteger()) {
14889     switch (LD->getExtensionType()) {
14890     case ISD::NON_EXTLOAD:
14891       Val = DAG.getBitcast(LDType, Val);
14892       return true;
14893     case ISD::EXTLOAD:
14894       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
14895       return true;
14896     case ISD::SEXTLOAD:
14897       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
14898       return true;
14899     case ISD::ZEXTLOAD:
14900       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14901       return true;
14902     }
14903   }
14904   return false;
14905 }
14906 
14907 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14908   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
14909     return SDValue();
14910   SDValue Chain = LD->getOperand(0);
14911   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14912   // TODO: Relax this restriction for unordered atomics (see D66309)
14913   if (!ST || !ST->isSimple())
14914     return SDValue();
14915 
14916   EVT LDType = LD->getValueType(0);
14917   EVT LDMemType = LD->getMemoryVT();
14918   EVT STMemType = ST->getMemoryVT();
14919   EVT STType = ST->getValue().getValueType();
14920 
14921   // There are two cases to consider here:
14922   //  1. The store is fixed width and the load is scalable. In this case we
14923   //     don't know at compile time if the store completely envelops the load
14924   //     so we abandon the optimisation.
14925   //  2. The store is scalable and the load is fixed width. We could
14926   //     potentially support a limited number of cases here, but there has been
14927   //     no cost-benefit analysis to prove it's worth it.
14928   bool LdStScalable = LDMemType.isScalableVector();
14929   if (LdStScalable != STMemType.isScalableVector())
14930     return SDValue();
14931 
14932   // If we are dealing with scalable vectors on a big endian platform the
14933   // calculation of offsets below becomes trickier, since we do not know at
14934   // compile time the absolute size of the vector. Until we've done more
14935   // analysis on big-endian platforms it seems better to bail out for now.
14936   if (LdStScalable && DAG.getDataLayout().isBigEndian())
14937     return SDValue();
14938 
14939   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14940   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14941   int64_t Offset;
14942   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14943     return SDValue();
14944 
14945   // Normalize for Endianness. After this Offset=0 will denote that the least
14946   // significant bit in the loaded value maps to the least significant bit in
14947   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14948   // n:th least significant byte of the stored value.
14949   if (DAG.getDataLayout().isBigEndian())
14950     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
14951               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
14952                  8 -
14953              Offset;
14954 
14955   // Check that the stored value cover all bits that are loaded.
14956   bool STCoversLD;
14957 
14958   TypeSize LdMemSize = LDMemType.getSizeInBits();
14959   TypeSize StMemSize = STMemType.getSizeInBits();
14960   if (LdStScalable)
14961     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
14962   else
14963     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
14964                                    StMemSize.getFixedSize());
14965 
14966   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14967     if (LD->isIndexed()) {
14968       // Cannot handle opaque target constants and we must respect the user's
14969       // request not to split indexes from loads.
14970       if (!canSplitIdx(LD))
14971         return SDValue();
14972       SDValue Idx = SplitIndexingFromLoad(LD);
14973       SDValue Ops[] = {Val, Idx, Chain};
14974       return CombineTo(LD, Ops, 3);
14975     }
14976     return CombineTo(LD, Val, Chain);
14977   };
14978 
14979   if (!STCoversLD)
14980     return SDValue();
14981 
14982   // Memory as copy space (potentially masked).
14983   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
14984     // Simple case: Direct non-truncating forwarding
14985     if (LDType.getSizeInBits() == LdMemSize)
14986       return ReplaceLd(LD, ST->getValue(), Chain);
14987     // Can we model the truncate and extension with an and mask?
14988     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14989         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14990       // Mask to size of LDMemType
14991       auto Mask =
14992           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
14993                                                StMemSize.getFixedSize()),
14994                           SDLoc(ST), STType);
14995       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14996       return ReplaceLd(LD, Val, Chain);
14997     }
14998   }
14999 
15000   // TODO: Deal with nonzero offset.
15001   if (LD->getBasePtr().isUndef() || Offset != 0)
15002     return SDValue();
15003   // Model necessary truncations / extenstions.
15004   SDValue Val;
15005   // Truncate Value To Stored Memory Size.
15006   do {
15007     if (!getTruncatedStoreValue(ST, Val))
15008       continue;
15009     if (!isTypeLegal(LDMemType))
15010       continue;
15011     if (STMemType != LDMemType) {
15012       // TODO: Support vectors? This requires extract_subvector/bitcast.
15013       if (!STMemType.isVector() && !LDMemType.isVector() &&
15014           STMemType.isInteger() && LDMemType.isInteger())
15015         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15016       else
15017         continue;
15018     }
15019     if (!extendLoadedValueToExtension(LD, Val))
15020       continue;
15021     return ReplaceLd(LD, Val, Chain);
15022   } while (false);
15023 
15024   // On failure, cleanup dead nodes we may have created.
15025   if (Val->use_empty())
15026     deleteAndRecombine(Val.getNode());
15027   return SDValue();
15028 }
15029 
15030 SDValue DAGCombiner::visitLOAD(SDNode *N) {
15031   LoadSDNode *LD  = cast<LoadSDNode>(N);
15032   SDValue Chain = LD->getChain();
15033   SDValue Ptr   = LD->getBasePtr();
15034 
15035   // If load is not volatile and there are no uses of the loaded value (and
15036   // the updated indexed value in case of indexed loads), change uses of the
15037   // chain value into uses of the chain input (i.e. delete the dead load).
15038   // TODO: Allow this for unordered atomics (see D66309)
15039   if (LD->isSimple()) {
15040     if (N->getValueType(1) == MVT::Other) {
15041       // Unindexed loads.
15042       if (!N->hasAnyUseOfValue(0)) {
15043         // It's not safe to use the two value CombineTo variant here. e.g.
15044         // v1, chain2 = load chain1, loc
15045         // v2, chain3 = load chain2, loc
15046         // v3         = add v2, c
15047         // Now we replace use of chain2 with chain1.  This makes the second load
15048         // isomorphic to the one we are deleting, and thus makes this load live.
15049         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
15050                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
15051                    dbgs() << "\n");
15052         WorklistRemover DeadNodes(*this);
15053         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15054         AddUsersToWorklist(Chain.getNode());
15055         if (N->use_empty())
15056           deleteAndRecombine(N);
15057 
15058         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15059       }
15060     } else {
15061       // Indexed loads.
15062       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
15063 
15064       // If this load has an opaque TargetConstant offset, then we cannot split
15065       // the indexing into an add/sub directly (that TargetConstant may not be
15066       // valid for a different type of node, and we cannot convert an opaque
15067       // target constant into a regular constant).
15068       bool CanSplitIdx = canSplitIdx(LD);
15069 
15070       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
15071         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15072         SDValue Index;
15073         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15074           Index = SplitIndexingFromLoad(LD);
15075           // Try to fold the base pointer arithmetic into subsequent loads and
15076           // stores.
15077           AddUsersToWorklist(N);
15078         } else
15079           Index = DAG.getUNDEF(N->getValueType(1));
15080         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
15081                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
15082                    dbgs() << " and 2 other values\n");
15083         WorklistRemover DeadNodes(*this);
15084         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15085         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15086         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15087         deleteAndRecombine(N);
15088         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15089       }
15090     }
15091   }
15092 
15093   // If this load is directly stored, replace the load value with the stored
15094   // value.
15095   if (auto V = ForwardStoreValueToDirectLoad(LD))
15096     return V;
15097 
15098   // Try to infer better alignment information than the load already has.
15099   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15100     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15101       if (*Alignment > LD->getAlign() &&
15102           isAligned(*Alignment, LD->getSrcValueOffset())) {
15103         SDValue NewLoad = DAG.getExtLoad(
15104             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15105             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15106             LD->getMemOperand()->getFlags(), LD->getAAInfo());
15107         // NewLoad will always be N as we are only refining the alignment
15108         assert(NewLoad.getNode() == N);
15109         (void)NewLoad;
15110       }
15111     }
15112   }
15113 
15114   if (LD->isUnindexed()) {
15115     // Walk up chain skipping non-aliasing memory nodes.
15116     SDValue BetterChain = FindBetterChain(LD, Chain);
15117 
15118     // If there is a better chain.
15119     if (Chain != BetterChain) {
15120       SDValue ReplLoad;
15121 
15122       // Replace the chain to void dependency.
15123       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15124         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15125                                BetterChain, Ptr, LD->getMemOperand());
15126       } else {
15127         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15128                                   LD->getValueType(0),
15129                                   BetterChain, Ptr, LD->getMemoryVT(),
15130                                   LD->getMemOperand());
15131       }
15132 
15133       // Create token factor to keep old chain connected.
15134       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15135                                   MVT::Other, Chain, ReplLoad.getValue(1));
15136 
15137       // Replace uses with load result and token factor
15138       return CombineTo(N, ReplLoad.getValue(0), Token);
15139     }
15140   }
15141 
15142   // Try transforming N to an indexed load.
15143   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15144     return SDValue(N, 0);
15145 
15146   // Try to slice up N to more direct loads if the slices are mapped to
15147   // different register banks or pairing can take place.
15148   if (SliceUpLoad(N))
15149     return SDValue(N, 0);
15150 
15151   return SDValue();
15152 }
15153 
15154 namespace {
15155 
15156 /// Helper structure used to slice a load in smaller loads.
15157 /// Basically a slice is obtained from the following sequence:
15158 /// Origin = load Ty1, Base
15159 /// Shift = srl Ty1 Origin, CstTy Amount
15160 /// Inst = trunc Shift to Ty2
15161 ///
15162 /// Then, it will be rewritten into:
15163 /// Slice = load SliceTy, Base + SliceOffset
15164 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
15165 ///
15166 /// SliceTy is deduced from the number of bits that are actually used to
15167 /// build Inst.
15168 struct LoadedSlice {
15169   /// Helper structure used to compute the cost of a slice.
15170   struct Cost {
15171     /// Are we optimizing for code size.
15172     bool ForCodeSize = false;
15173 
15174     /// Various cost.
15175     unsigned Loads = 0;
15176     unsigned Truncates = 0;
15177     unsigned CrossRegisterBanksCopies = 0;
15178     unsigned ZExts = 0;
15179     unsigned Shift = 0;
15180 
15181     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
15182 
15183     /// Get the cost of one isolated slice.
15184     Cost(const LoadedSlice &LS, bool ForCodeSize)
15185         : ForCodeSize(ForCodeSize), Loads(1) {
15186       EVT TruncType = LS.Inst->getValueType(0);
15187       EVT LoadedType = LS.getLoadedType();
15188       if (TruncType != LoadedType &&
15189           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
15190         ZExts = 1;
15191     }
15192 
15193     /// Account for slicing gain in the current cost.
15194     /// Slicing provide a few gains like removing a shift or a
15195     /// truncate. This method allows to grow the cost of the original
15196     /// load with the gain from this slice.
15197     void addSliceGain(const LoadedSlice &LS) {
15198       // Each slice saves a truncate.
15199       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
15200       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
15201                               LS.Inst->getValueType(0)))
15202         ++Truncates;
15203       // If there is a shift amount, this slice gets rid of it.
15204       if (LS.Shift)
15205         ++Shift;
15206       // If this slice can merge a cross register bank copy, account for it.
15207       if (LS.canMergeExpensiveCrossRegisterBankCopy())
15208         ++CrossRegisterBanksCopies;
15209     }
15210 
15211     Cost &operator+=(const Cost &RHS) {
15212       Loads += RHS.Loads;
15213       Truncates += RHS.Truncates;
15214       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
15215       ZExts += RHS.ZExts;
15216       Shift += RHS.Shift;
15217       return *this;
15218     }
15219 
15220     bool operator==(const Cost &RHS) const {
15221       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
15222              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
15223              ZExts == RHS.ZExts && Shift == RHS.Shift;
15224     }
15225 
15226     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
15227 
15228     bool operator<(const Cost &RHS) const {
15229       // Assume cross register banks copies are as expensive as loads.
15230       // FIXME: Do we want some more target hooks?
15231       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
15232       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
15233       // Unless we are optimizing for code size, consider the
15234       // expensive operation first.
15235       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
15236         return ExpensiveOpsLHS < ExpensiveOpsRHS;
15237       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
15238              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
15239     }
15240 
15241     bool operator>(const Cost &RHS) const { return RHS < *this; }
15242 
15243     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
15244 
15245     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
15246   };
15247 
15248   // The last instruction that represent the slice. This should be a
15249   // truncate instruction.
15250   SDNode *Inst;
15251 
15252   // The original load instruction.
15253   LoadSDNode *Origin;
15254 
15255   // The right shift amount in bits from the original load.
15256   unsigned Shift;
15257 
15258   // The DAG from which Origin came from.
15259   // This is used to get some contextual information about legal types, etc.
15260   SelectionDAG *DAG;
15261 
15262   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
15263               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
15264       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
15265 
15266   /// Get the bits used in a chunk of bits \p BitWidth large.
15267   /// \return Result is \p BitWidth and has used bits set to 1 and
15268   ///         not used bits set to 0.
15269   APInt getUsedBits() const {
15270     // Reproduce the trunc(lshr) sequence:
15271     // - Start from the truncated value.
15272     // - Zero extend to the desired bit width.
15273     // - Shift left.
15274     assert(Origin && "No original load to compare against.");
15275     unsigned BitWidth = Origin->getValueSizeInBits(0);
15276     assert(Inst && "This slice is not bound to an instruction");
15277     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
15278            "Extracted slice is bigger than the whole type!");
15279     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
15280     UsedBits.setAllBits();
15281     UsedBits = UsedBits.zext(BitWidth);
15282     UsedBits <<= Shift;
15283     return UsedBits;
15284   }
15285 
15286   /// Get the size of the slice to be loaded in bytes.
15287   unsigned getLoadedSize() const {
15288     unsigned SliceSize = getUsedBits().countPopulation();
15289     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
15290     return SliceSize / 8;
15291   }
15292 
15293   /// Get the type that will be loaded for this slice.
15294   /// Note: This may not be the final type for the slice.
15295   EVT getLoadedType() const {
15296     assert(DAG && "Missing context");
15297     LLVMContext &Ctxt = *DAG->getContext();
15298     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
15299   }
15300 
15301   /// Get the alignment of the load used for this slice.
15302   Align getAlign() const {
15303     Align Alignment = Origin->getAlign();
15304     uint64_t Offset = getOffsetFromBase();
15305     if (Offset != 0)
15306       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
15307     return Alignment;
15308   }
15309 
15310   /// Check if this slice can be rewritten with legal operations.
15311   bool isLegal() const {
15312     // An invalid slice is not legal.
15313     if (!Origin || !Inst || !DAG)
15314       return false;
15315 
15316     // Offsets are for indexed load only, we do not handle that.
15317     if (!Origin->getOffset().isUndef())
15318       return false;
15319 
15320     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15321 
15322     // Check that the type is legal.
15323     EVT SliceType = getLoadedType();
15324     if (!TLI.isTypeLegal(SliceType))
15325       return false;
15326 
15327     // Check that the load is legal for this type.
15328     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
15329       return false;
15330 
15331     // Check that the offset can be computed.
15332     // 1. Check its type.
15333     EVT PtrType = Origin->getBasePtr().getValueType();
15334     if (PtrType == MVT::Untyped || PtrType.isExtended())
15335       return false;
15336 
15337     // 2. Check that it fits in the immediate.
15338     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
15339       return false;
15340 
15341     // 3. Check that the computation is legal.
15342     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
15343       return false;
15344 
15345     // Check that the zext is legal if it needs one.
15346     EVT TruncateType = Inst->getValueType(0);
15347     if (TruncateType != SliceType &&
15348         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
15349       return false;
15350 
15351     return true;
15352   }
15353 
15354   /// Get the offset in bytes of this slice in the original chunk of
15355   /// bits.
15356   /// \pre DAG != nullptr.
15357   uint64_t getOffsetFromBase() const {
15358     assert(DAG && "Missing context.");
15359     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
15360     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
15361     uint64_t Offset = Shift / 8;
15362     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
15363     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
15364            "The size of the original loaded type is not a multiple of a"
15365            " byte.");
15366     // If Offset is bigger than TySizeInBytes, it means we are loading all
15367     // zeros. This should have been optimized before in the process.
15368     assert(TySizeInBytes > Offset &&
15369            "Invalid shift amount for given loaded size");
15370     if (IsBigEndian)
15371       Offset = TySizeInBytes - Offset - getLoadedSize();
15372     return Offset;
15373   }
15374 
15375   /// Generate the sequence of instructions to load the slice
15376   /// represented by this object and redirect the uses of this slice to
15377   /// this new sequence of instructions.
15378   /// \pre this->Inst && this->Origin are valid Instructions and this
15379   /// object passed the legal check: LoadedSlice::isLegal returned true.
15380   /// \return The last instruction of the sequence used to load the slice.
15381   SDValue loadSlice() const {
15382     assert(Inst && Origin && "Unable to replace a non-existing slice.");
15383     const SDValue &OldBaseAddr = Origin->getBasePtr();
15384     SDValue BaseAddr = OldBaseAddr;
15385     // Get the offset in that chunk of bytes w.r.t. the endianness.
15386     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
15387     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
15388     if (Offset) {
15389       // BaseAddr = BaseAddr + Offset.
15390       EVT ArithType = BaseAddr.getValueType();
15391       SDLoc DL(Origin);
15392       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
15393                               DAG->getConstant(Offset, DL, ArithType));
15394     }
15395 
15396     // Create the type of the loaded slice according to its size.
15397     EVT SliceType = getLoadedType();
15398 
15399     // Create the load for the slice.
15400     SDValue LastInst =
15401         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
15402                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
15403                      Origin->getMemOperand()->getFlags());
15404     // If the final type is not the same as the loaded type, this means that
15405     // we have to pad with zero. Create a zero extend for that.
15406     EVT FinalType = Inst->getValueType(0);
15407     if (SliceType != FinalType)
15408       LastInst =
15409           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
15410     return LastInst;
15411   }
15412 
15413   /// Check if this slice can be merged with an expensive cross register
15414   /// bank copy. E.g.,
15415   /// i = load i32
15416   /// f = bitcast i32 i to float
15417   bool canMergeExpensiveCrossRegisterBankCopy() const {
15418     if (!Inst || !Inst->hasOneUse())
15419       return false;
15420     SDNode *Use = *Inst->use_begin();
15421     if (Use->getOpcode() != ISD::BITCAST)
15422       return false;
15423     assert(DAG && "Missing context");
15424     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15425     EVT ResVT = Use->getValueType(0);
15426     const TargetRegisterClass *ResRC =
15427         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
15428     const TargetRegisterClass *ArgRC =
15429         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
15430                            Use->getOperand(0)->isDivergent());
15431     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
15432       return false;
15433 
15434     // At this point, we know that we perform a cross-register-bank copy.
15435     // Check if it is expensive.
15436     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
15437     // Assume bitcasts are cheap, unless both register classes do not
15438     // explicitly share a common sub class.
15439     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
15440       return false;
15441 
15442     // Check if it will be merged with the load.
15443     // 1. Check the alignment constraint.
15444     Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
15445         ResVT.getTypeForEVT(*DAG->getContext()));
15446 
15447     if (RequiredAlignment > getAlign())
15448       return false;
15449 
15450     // 2. Check that the load is a legal operation for that type.
15451     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
15452       return false;
15453 
15454     // 3. Check that we do not have a zext in the way.
15455     if (Inst->getValueType(0) != getLoadedType())
15456       return false;
15457 
15458     return true;
15459   }
15460 };
15461 
15462 } // end anonymous namespace
15463 
15464 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
15465 /// \p UsedBits looks like 0..0 1..1 0..0.
15466 static bool areUsedBitsDense(const APInt &UsedBits) {
15467   // If all the bits are one, this is dense!
15468   if (UsedBits.isAllOnesValue())
15469     return true;
15470 
15471   // Get rid of the unused bits on the right.
15472   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
15473   // Get rid of the unused bits on the left.
15474   if (NarrowedUsedBits.countLeadingZeros())
15475     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
15476   // Check that the chunk of bits is completely used.
15477   return NarrowedUsedBits.isAllOnesValue();
15478 }
15479 
15480 /// Check whether or not \p First and \p Second are next to each other
15481 /// in memory. This means that there is no hole between the bits loaded
15482 /// by \p First and the bits loaded by \p Second.
15483 static bool areSlicesNextToEachOther(const LoadedSlice &First,
15484                                      const LoadedSlice &Second) {
15485   assert(First.Origin == Second.Origin && First.Origin &&
15486          "Unable to match different memory origins.");
15487   APInt UsedBits = First.getUsedBits();
15488   assert((UsedBits & Second.getUsedBits()) == 0 &&
15489          "Slices are not supposed to overlap.");
15490   UsedBits |= Second.getUsedBits();
15491   return areUsedBitsDense(UsedBits);
15492 }
15493 
15494 /// Adjust the \p GlobalLSCost according to the target
15495 /// paring capabilities and the layout of the slices.
15496 /// \pre \p GlobalLSCost should account for at least as many loads as
15497 /// there is in the slices in \p LoadedSlices.
15498 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15499                                  LoadedSlice::Cost &GlobalLSCost) {
15500   unsigned NumberOfSlices = LoadedSlices.size();
15501   // If there is less than 2 elements, no pairing is possible.
15502   if (NumberOfSlices < 2)
15503     return;
15504 
15505   // Sort the slices so that elements that are likely to be next to each
15506   // other in memory are next to each other in the list.
15507   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
15508     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
15509     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
15510   });
15511   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
15512   // First (resp. Second) is the first (resp. Second) potentially candidate
15513   // to be placed in a paired load.
15514   const LoadedSlice *First = nullptr;
15515   const LoadedSlice *Second = nullptr;
15516   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
15517                 // Set the beginning of the pair.
15518                                                            First = Second) {
15519     Second = &LoadedSlices[CurrSlice];
15520 
15521     // If First is NULL, it means we start a new pair.
15522     // Get to the next slice.
15523     if (!First)
15524       continue;
15525 
15526     EVT LoadedType = First->getLoadedType();
15527 
15528     // If the types of the slices are different, we cannot pair them.
15529     if (LoadedType != Second->getLoadedType())
15530       continue;
15531 
15532     // Check if the target supplies paired loads for this type.
15533     Align RequiredAlignment;
15534     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
15535       // move to the next pair, this type is hopeless.
15536       Second = nullptr;
15537       continue;
15538     }
15539     // Check if we meet the alignment requirement.
15540     if (First->getAlign() < RequiredAlignment)
15541       continue;
15542 
15543     // Check that both loads are next to each other in memory.
15544     if (!areSlicesNextToEachOther(*First, *Second))
15545       continue;
15546 
15547     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
15548     --GlobalLSCost.Loads;
15549     // Move to the next pair.
15550     Second = nullptr;
15551   }
15552 }
15553 
15554 /// Check the profitability of all involved LoadedSlice.
15555 /// Currently, it is considered profitable if there is exactly two
15556 /// involved slices (1) which are (2) next to each other in memory, and
15557 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
15558 ///
15559 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
15560 /// the elements themselves.
15561 ///
15562 /// FIXME: When the cost model will be mature enough, we can relax
15563 /// constraints (1) and (2).
15564 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15565                                 const APInt &UsedBits, bool ForCodeSize) {
15566   unsigned NumberOfSlices = LoadedSlices.size();
15567   if (StressLoadSlicing)
15568     return NumberOfSlices > 1;
15569 
15570   // Check (1).
15571   if (NumberOfSlices != 2)
15572     return false;
15573 
15574   // Check (2).
15575   if (!areUsedBitsDense(UsedBits))
15576     return false;
15577 
15578   // Check (3).
15579   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
15580   // The original code has one big load.
15581   OrigCost.Loads = 1;
15582   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
15583     const LoadedSlice &LS = LoadedSlices[CurrSlice];
15584     // Accumulate the cost of all the slices.
15585     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
15586     GlobalSlicingCost += SliceCost;
15587 
15588     // Account as cost in the original configuration the gain obtained
15589     // with the current slices.
15590     OrigCost.addSliceGain(LS);
15591   }
15592 
15593   // If the target supports paired load, adjust the cost accordingly.
15594   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
15595   return OrigCost > GlobalSlicingCost;
15596 }
15597 
15598 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
15599 /// operations, split it in the various pieces being extracted.
15600 ///
15601 /// This sort of thing is introduced by SROA.
15602 /// This slicing takes care not to insert overlapping loads.
15603 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
15604 bool DAGCombiner::SliceUpLoad(SDNode *N) {
15605   if (Level < AfterLegalizeDAG)
15606     return false;
15607 
15608   LoadSDNode *LD = cast<LoadSDNode>(N);
15609   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
15610       !LD->getValueType(0).isInteger())
15611     return false;
15612 
15613   // The algorithm to split up a load of a scalable vector into individual
15614   // elements currently requires knowing the length of the loaded type,
15615   // so will need adjusting to work on scalable vectors.
15616   if (LD->getValueType(0).isScalableVector())
15617     return false;
15618 
15619   // Keep track of already used bits to detect overlapping values.
15620   // In that case, we will just abort the transformation.
15621   APInt UsedBits(LD->getValueSizeInBits(0), 0);
15622 
15623   SmallVector<LoadedSlice, 4> LoadedSlices;
15624 
15625   // Check if this load is used as several smaller chunks of bits.
15626   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
15627   // of computation for each trunc.
15628   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
15629        UI != UIEnd; ++UI) {
15630     // Skip the uses of the chain.
15631     if (UI.getUse().getResNo() != 0)
15632       continue;
15633 
15634     SDNode *User = *UI;
15635     unsigned Shift = 0;
15636 
15637     // Check if this is a trunc(lshr).
15638     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
15639         isa<ConstantSDNode>(User->getOperand(1))) {
15640       Shift = User->getConstantOperandVal(1);
15641       User = *User->use_begin();
15642     }
15643 
15644     // At this point, User is a Truncate, iff we encountered, trunc or
15645     // trunc(lshr).
15646     if (User->getOpcode() != ISD::TRUNCATE)
15647       return false;
15648 
15649     // The width of the type must be a power of 2 and greater than 8-bits.
15650     // Otherwise the load cannot be represented in LLVM IR.
15651     // Moreover, if we shifted with a non-8-bits multiple, the slice
15652     // will be across several bytes. We do not support that.
15653     unsigned Width = User->getValueSizeInBits(0);
15654     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
15655       return false;
15656 
15657     // Build the slice for this chain of computations.
15658     LoadedSlice LS(User, LD, Shift, &DAG);
15659     APInt CurrentUsedBits = LS.getUsedBits();
15660 
15661     // Check if this slice overlaps with another.
15662     if ((CurrentUsedBits & UsedBits) != 0)
15663       return false;
15664     // Update the bits used globally.
15665     UsedBits |= CurrentUsedBits;
15666 
15667     // Check if the new slice would be legal.
15668     if (!LS.isLegal())
15669       return false;
15670 
15671     // Record the slice.
15672     LoadedSlices.push_back(LS);
15673   }
15674 
15675   // Abort slicing if it does not seem to be profitable.
15676   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
15677     return false;
15678 
15679   ++SlicedLoads;
15680 
15681   // Rewrite each chain to use an independent load.
15682   // By construction, each chain can be represented by a unique load.
15683 
15684   // Prepare the argument for the new token factor for all the slices.
15685   SmallVector<SDValue, 8> ArgChains;
15686   for (SmallVectorImpl<LoadedSlice>::const_iterator
15687            LSIt = LoadedSlices.begin(),
15688            LSItEnd = LoadedSlices.end();
15689        LSIt != LSItEnd; ++LSIt) {
15690     SDValue SliceInst = LSIt->loadSlice();
15691     CombineTo(LSIt->Inst, SliceInst, true);
15692     if (SliceInst.getOpcode() != ISD::LOAD)
15693       SliceInst = SliceInst.getOperand(0);
15694     assert(SliceInst->getOpcode() == ISD::LOAD &&
15695            "It takes more than a zext to get to the loaded slice!!");
15696     ArgChains.push_back(SliceInst.getValue(1));
15697   }
15698 
15699   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
15700                               ArgChains);
15701   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15702   AddToWorklist(Chain.getNode());
15703   return true;
15704 }
15705 
15706 /// Check to see if V is (and load (ptr), imm), where the load is having
15707 /// specific bytes cleared out.  If so, return the byte size being masked out
15708 /// and the shift amount.
15709 static std::pair<unsigned, unsigned>
15710 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
15711   std::pair<unsigned, unsigned> Result(0, 0);
15712 
15713   // Check for the structure we're looking for.
15714   if (V->getOpcode() != ISD::AND ||
15715       !isa<ConstantSDNode>(V->getOperand(1)) ||
15716       !ISD::isNormalLoad(V->getOperand(0).getNode()))
15717     return Result;
15718 
15719   // Check the chain and pointer.
15720   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
15721   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
15722 
15723   // This only handles simple types.
15724   if (V.getValueType() != MVT::i16 &&
15725       V.getValueType() != MVT::i32 &&
15726       V.getValueType() != MVT::i64)
15727     return Result;
15728 
15729   // Check the constant mask.  Invert it so that the bits being masked out are
15730   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
15731   // follow the sign bit for uniformity.
15732   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
15733   unsigned NotMaskLZ = countLeadingZeros(NotMask);
15734   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
15735   unsigned NotMaskTZ = countTrailingZeros(NotMask);
15736   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
15737   if (NotMaskLZ == 64) return Result;  // All zero mask.
15738 
15739   // See if we have a continuous run of bits.  If so, we have 0*1+0*
15740   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
15741     return Result;
15742 
15743   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
15744   if (V.getValueType() != MVT::i64 && NotMaskLZ)
15745     NotMaskLZ -= 64-V.getValueSizeInBits();
15746 
15747   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
15748   switch (MaskedBytes) {
15749   case 1:
15750   case 2:
15751   case 4: break;
15752   default: return Result; // All one mask, or 5-byte mask.
15753   }
15754 
15755   // Verify that the first bit starts at a multiple of mask so that the access
15756   // is aligned the same as the access width.
15757   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
15758 
15759   // For narrowing to be valid, it must be the case that the load the
15760   // immediately preceding memory operation before the store.
15761   if (LD == Chain.getNode())
15762     ; // ok.
15763   else if (Chain->getOpcode() == ISD::TokenFactor &&
15764            SDValue(LD, 1).hasOneUse()) {
15765     // LD has only 1 chain use so they are no indirect dependencies.
15766     if (!LD->isOperandOf(Chain.getNode()))
15767       return Result;
15768   } else
15769     return Result; // Fail.
15770 
15771   Result.first = MaskedBytes;
15772   Result.second = NotMaskTZ/8;
15773   return Result;
15774 }
15775 
15776 /// Check to see if IVal is something that provides a value as specified by
15777 /// MaskInfo. If so, replace the specified store with a narrower store of
15778 /// truncated IVal.
15779 static SDValue
15780 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
15781                                 SDValue IVal, StoreSDNode *St,
15782                                 DAGCombiner *DC) {
15783   unsigned NumBytes = MaskInfo.first;
15784   unsigned ByteShift = MaskInfo.second;
15785   SelectionDAG &DAG = DC->getDAG();
15786 
15787   // Check to see if IVal is all zeros in the part being masked in by the 'or'
15788   // that uses this.  If not, this is not a replacement.
15789   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
15790                                   ByteShift*8, (ByteShift+NumBytes)*8);
15791   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
15792 
15793   // Check that it is legal on the target to do this.  It is legal if the new
15794   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
15795   // legalization (and the target doesn't explicitly think this is a bad idea).
15796   MVT VT = MVT::getIntegerVT(NumBytes * 8);
15797   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15798   if (!DC->isTypeLegal(VT))
15799     return SDValue();
15800   if (St->getMemOperand() &&
15801       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15802                               *St->getMemOperand()))
15803     return SDValue();
15804 
15805   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
15806   // shifted by ByteShift and truncated down to NumBytes.
15807   if (ByteShift) {
15808     SDLoc DL(IVal);
15809     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
15810                        DAG.getConstant(ByteShift*8, DL,
15811                                     DC->getShiftAmountTy(IVal.getValueType())));
15812   }
15813 
15814   // Figure out the offset for the store and the alignment of the access.
15815   unsigned StOffset;
15816   if (DAG.getDataLayout().isLittleEndian())
15817     StOffset = ByteShift;
15818   else
15819     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
15820 
15821   SDValue Ptr = St->getBasePtr();
15822   if (StOffset) {
15823     SDLoc DL(IVal);
15824     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
15825   }
15826 
15827   // Truncate down to the new size.
15828   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
15829 
15830   ++OpsNarrowed;
15831   return DAG
15832       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
15833                 St->getPointerInfo().getWithOffset(StOffset),
15834                 St->getOriginalAlign());
15835 }
15836 
15837 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
15838 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
15839 /// narrowing the load and store if it would end up being a win for performance
15840 /// or code size.
15841 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
15842   StoreSDNode *ST  = cast<StoreSDNode>(N);
15843   if (!ST->isSimple())
15844     return SDValue();
15845 
15846   SDValue Chain = ST->getChain();
15847   SDValue Value = ST->getValue();
15848   SDValue Ptr   = ST->getBasePtr();
15849   EVT VT = Value.getValueType();
15850 
15851   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
15852     return SDValue();
15853 
15854   unsigned Opc = Value.getOpcode();
15855 
15856   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
15857   // is a byte mask indicating a consecutive number of bytes, check to see if
15858   // Y is known to provide just those bytes.  If so, we try to replace the
15859   // load + replace + store sequence with a single (narrower) store, which makes
15860   // the load dead.
15861   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
15862     std::pair<unsigned, unsigned> MaskedLoad;
15863     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
15864     if (MaskedLoad.first)
15865       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15866                                                   Value.getOperand(1), ST,this))
15867         return NewST;
15868 
15869     // Or is commutative, so try swapping X and Y.
15870     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
15871     if (MaskedLoad.first)
15872       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15873                                                   Value.getOperand(0), ST,this))
15874         return NewST;
15875   }
15876 
15877   if (!EnableReduceLoadOpStoreWidth)
15878     return SDValue();
15879 
15880   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
15881       Value.getOperand(1).getOpcode() != ISD::Constant)
15882     return SDValue();
15883 
15884   SDValue N0 = Value.getOperand(0);
15885   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15886       Chain == SDValue(N0.getNode(), 1)) {
15887     LoadSDNode *LD = cast<LoadSDNode>(N0);
15888     if (LD->getBasePtr() != Ptr ||
15889         LD->getPointerInfo().getAddrSpace() !=
15890         ST->getPointerInfo().getAddrSpace())
15891       return SDValue();
15892 
15893     // Find the type to narrow it the load / op / store to.
15894     SDValue N1 = Value.getOperand(1);
15895     unsigned BitWidth = N1.getValueSizeInBits();
15896     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
15897     if (Opc == ISD::AND)
15898       Imm ^= APInt::getAllOnesValue(BitWidth);
15899     if (Imm == 0 || Imm.isAllOnesValue())
15900       return SDValue();
15901     unsigned ShAmt = Imm.countTrailingZeros();
15902     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
15903     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
15904     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15905     // The narrowing should be profitable, the load/store operation should be
15906     // legal (or custom) and the store size should be equal to the NewVT width.
15907     while (NewBW < BitWidth &&
15908            (NewVT.getStoreSizeInBits() != NewBW ||
15909             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
15910             !TLI.isNarrowingProfitable(VT, NewVT))) {
15911       NewBW = NextPowerOf2(NewBW);
15912       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15913     }
15914     if (NewBW >= BitWidth)
15915       return SDValue();
15916 
15917     // If the lsb changed does not start at the type bitwidth boundary,
15918     // start at the previous one.
15919     if (ShAmt % NewBW)
15920       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
15921     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
15922                                    std::min(BitWidth, ShAmt + NewBW));
15923     if ((Imm & Mask) == Imm) {
15924       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
15925       if (Opc == ISD::AND)
15926         NewImm ^= APInt::getAllOnesValue(NewBW);
15927       uint64_t PtrOff = ShAmt / 8;
15928       // For big endian targets, we need to adjust the offset to the pointer to
15929       // load the correct bytes.
15930       if (DAG.getDataLayout().isBigEndian())
15931         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
15932 
15933       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
15934       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
15935       if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
15936         return SDValue();
15937 
15938       SDValue NewPtr =
15939           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
15940       SDValue NewLD =
15941           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15942                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15943                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
15944       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15945                                    DAG.getConstant(NewImm, SDLoc(Value),
15946                                                    NewVT));
15947       SDValue NewST =
15948           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15949                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15950 
15951       AddToWorklist(NewPtr.getNode());
15952       AddToWorklist(NewLD.getNode());
15953       AddToWorklist(NewVal.getNode());
15954       WorklistRemover DeadNodes(*this);
15955       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15956       ++OpsNarrowed;
15957       return NewST;
15958     }
15959   }
15960 
15961   return SDValue();
15962 }
15963 
15964 /// For a given floating point load / store pair, if the load value isn't used
15965 /// by any other operations, then consider transforming the pair to integer
15966 /// load / store operations if the target deems the transformation profitable.
15967 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15968   StoreSDNode *ST  = cast<StoreSDNode>(N);
15969   SDValue Value = ST->getValue();
15970   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
15971       Value.hasOneUse()) {
15972     LoadSDNode *LD = cast<LoadSDNode>(Value);
15973     EVT VT = LD->getMemoryVT();
15974     if (!VT.isFloatingPoint() ||
15975         VT != ST->getMemoryVT() ||
15976         LD->isNonTemporal() ||
15977         ST->isNonTemporal() ||
15978         LD->getPointerInfo().getAddrSpace() != 0 ||
15979         ST->getPointerInfo().getAddrSpace() != 0)
15980       return SDValue();
15981 
15982     TypeSize VTSize = VT.getSizeInBits();
15983 
15984     // We don't know the size of scalable types at compile time so we cannot
15985     // create an integer of the equivalent size.
15986     if (VTSize.isScalable())
15987       return SDValue();
15988 
15989     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
15990     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15991         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
15992         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
15993         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
15994       return SDValue();
15995 
15996     Align LDAlign = LD->getAlign();
15997     Align STAlign = ST->getAlign();
15998     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15999     Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
16000     if (LDAlign < ABIAlign || STAlign < ABIAlign)
16001       return SDValue();
16002 
16003     SDValue NewLD =
16004         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16005                     LD->getPointerInfo(), LDAlign);
16006 
16007     SDValue NewST =
16008         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16009                      ST->getPointerInfo(), STAlign);
16010 
16011     AddToWorklist(NewLD.getNode());
16012     AddToWorklist(NewST.getNode());
16013     WorklistRemover DeadNodes(*this);
16014     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16015     ++LdStFP2Int;
16016     return NewST;
16017   }
16018 
16019   return SDValue();
16020 }
16021 
16022 // This is a helper function for visitMUL to check the profitability
16023 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16024 // MulNode is the original multiply, AddNode is (add x, c1),
16025 // and ConstNode is c2.
16026 //
16027 // If the (add x, c1) has multiple uses, we could increase
16028 // the number of adds if we make this transformation.
16029 // It would only be worth doing this if we can remove a
16030 // multiply in the process. Check for that here.
16031 // To illustrate:
16032 //     (A + c1) * c3
16033 //     (A + c2) * c3
16034 // We're checking for cases where we have common "c3 * A" expressions.
16035 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16036                                               SDValue &AddNode,
16037                                               SDValue &ConstNode) {
16038   APInt Val;
16039 
16040   // If the add only has one use, this would be OK to do.
16041   if (AddNode.getNode()->hasOneUse())
16042     return true;
16043 
16044   // Walk all the users of the constant with which we're multiplying.
16045   for (SDNode *Use : ConstNode->uses()) {
16046     if (Use == MulNode) // This use is the one we're on right now. Skip it.
16047       continue;
16048 
16049     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16050       SDNode *OtherOp;
16051       SDNode *MulVar = AddNode.getOperand(0).getNode();
16052 
16053       // OtherOp is what we're multiplying against the constant.
16054       if (Use->getOperand(0) == ConstNode)
16055         OtherOp = Use->getOperand(1).getNode();
16056       else
16057         OtherOp = Use->getOperand(0).getNode();
16058 
16059       // Check to see if multiply is with the same operand of our "add".
16060       //
16061       //     ConstNode  = CONST
16062       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
16063       //     ...
16064       //     AddNode  = (A + c1)  <-- MulVar is A.
16065       //         = AddNode * ConstNode   <-- current visiting instruction.
16066       //
16067       // If we make this transformation, we will have a common
16068       // multiply (ConstNode * A) that we can save.
16069       if (OtherOp == MulVar)
16070         return true;
16071 
16072       // Now check to see if a future expansion will give us a common
16073       // multiply.
16074       //
16075       //     ConstNode  = CONST
16076       //     AddNode    = (A + c1)
16077       //     ...   = AddNode * ConstNode <-- current visiting instruction.
16078       //     ...
16079       //     OtherOp = (A + c2)
16080       //     Use     = OtherOp * ConstNode <-- visiting Use.
16081       //
16082       // If we make this transformation, we will have a common
16083       // multiply (CONST * A) after we also do the same transformation
16084       // to the "t2" instruction.
16085       if (OtherOp->getOpcode() == ISD::ADD &&
16086           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16087           OtherOp->getOperand(0).getNode() == MulVar)
16088         return true;
16089     }
16090   }
16091 
16092   // Didn't find a case where this would be profitable.
16093   return false;
16094 }
16095 
16096 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16097                                          unsigned NumStores) {
16098   SmallVector<SDValue, 8> Chains;
16099   SmallPtrSet<const SDNode *, 8> Visited;
16100   SDLoc StoreDL(StoreNodes[0].MemNode);
16101 
16102   for (unsigned i = 0; i < NumStores; ++i) {
16103     Visited.insert(StoreNodes[i].MemNode);
16104   }
16105 
16106   // don't include nodes that are children or repeated nodes.
16107   for (unsigned i = 0; i < NumStores; ++i) {
16108     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16109       Chains.push_back(StoreNodes[i].MemNode->getChain());
16110   }
16111 
16112   assert(Chains.size() > 0 && "Chain should have generated a chain");
16113   return DAG.getTokenFactor(StoreDL, Chains);
16114 }
16115 
16116 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16117     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16118     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16119   // Make sure we have something to merge.
16120   if (NumStores < 2)
16121     return false;
16122 
16123   // The latest Node in the DAG.
16124   SDLoc DL(StoreNodes[0].MemNode);
16125 
16126   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16127   unsigned SizeInBits = NumStores * ElementSizeBits;
16128   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16129 
16130   EVT StoreTy;
16131   if (UseVector) {
16132     unsigned Elts = NumStores * NumMemElts;
16133     // Get the type for the merged vector store.
16134     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16135   } else
16136     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
16137 
16138   SDValue StoredVal;
16139   if (UseVector) {
16140     if (IsConstantSrc) {
16141       SmallVector<SDValue, 8> BuildVector;
16142       for (unsigned I = 0; I != NumStores; ++I) {
16143         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16144         SDValue Val = St->getValue();
16145         // If constant is of the wrong type, convert it now.
16146         if (MemVT != Val.getValueType()) {
16147           Val = peekThroughBitcasts(Val);
16148           // Deal with constants of wrong size.
16149           if (ElementSizeBits != Val.getValueSizeInBits()) {
16150             EVT IntMemVT =
16151                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
16152             if (isa<ConstantFPSDNode>(Val)) {
16153               // Not clear how to truncate FP values.
16154               return false;
16155             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
16156               Val = DAG.getConstant(C->getAPIntValue()
16157                                         .zextOrTrunc(Val.getValueSizeInBits())
16158                                         .zextOrTrunc(ElementSizeBits),
16159                                     SDLoc(C), IntMemVT);
16160           }
16161           // Make sure correctly size type is the correct type.
16162           Val = DAG.getBitcast(MemVT, Val);
16163         }
16164         BuildVector.push_back(Val);
16165       }
16166       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16167                                                : ISD::BUILD_VECTOR,
16168                               DL, StoreTy, BuildVector);
16169     } else {
16170       SmallVector<SDValue, 8> Ops;
16171       for (unsigned i = 0; i < NumStores; ++i) {
16172         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16173         SDValue Val = peekThroughBitcasts(St->getValue());
16174         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
16175         // type MemVT. If the underlying value is not the correct
16176         // type, but it is an extraction of an appropriate vector we
16177         // can recast Val to be of the correct type. This may require
16178         // converting between EXTRACT_VECTOR_ELT and
16179         // EXTRACT_SUBVECTOR.
16180         if ((MemVT != Val.getValueType()) &&
16181             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
16182              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
16183           EVT MemVTScalarTy = MemVT.getScalarType();
16184           // We may need to add a bitcast here to get types to line up.
16185           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
16186             Val = DAG.getBitcast(MemVT, Val);
16187           } else {
16188             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
16189                                             : ISD::EXTRACT_VECTOR_ELT;
16190             SDValue Vec = Val.getOperand(0);
16191             SDValue Idx = Val.getOperand(1);
16192             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
16193           }
16194         }
16195         Ops.push_back(Val);
16196       }
16197 
16198       // Build the extracted vector elements back into a vector.
16199       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16200                                                : ISD::BUILD_VECTOR,
16201                               DL, StoreTy, Ops);
16202     }
16203   } else {
16204     // We should always use a vector store when merging extracted vector
16205     // elements, so this path implies a store of constants.
16206     assert(IsConstantSrc && "Merged vector elements should use vector store");
16207 
16208     APInt StoreInt(SizeInBits, 0);
16209 
16210     // Construct a single integer constant which is made of the smaller
16211     // constant inputs.
16212     bool IsLE = DAG.getDataLayout().isLittleEndian();
16213     for (unsigned i = 0; i < NumStores; ++i) {
16214       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
16215       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
16216 
16217       SDValue Val = St->getValue();
16218       Val = peekThroughBitcasts(Val);
16219       StoreInt <<= ElementSizeBits;
16220       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
16221         StoreInt |= C->getAPIntValue()
16222                         .zextOrTrunc(ElementSizeBits)
16223                         .zextOrTrunc(SizeInBits);
16224       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
16225         StoreInt |= C->getValueAPF()
16226                         .bitcastToAPInt()
16227                         .zextOrTrunc(ElementSizeBits)
16228                         .zextOrTrunc(SizeInBits);
16229         // If fp truncation is necessary give up for now.
16230         if (MemVT.getSizeInBits() != ElementSizeBits)
16231           return false;
16232       } else {
16233         llvm_unreachable("Invalid constant element type");
16234       }
16235     }
16236 
16237     // Create the new Load and Store operations.
16238     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
16239   }
16240 
16241   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16242   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
16243 
16244   // make sure we use trunc store if it's necessary to be legal.
16245   SDValue NewStore;
16246   if (!UseTrunc) {
16247     NewStore =
16248         DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
16249                      FirstInChain->getPointerInfo(), FirstInChain->getAlign());
16250   } else { // Must be realized as a trunc store
16251     EVT LegalizedStoredValTy =
16252         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
16253     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
16254     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
16255     SDValue ExtendedStoreVal =
16256         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
16257                         LegalizedStoredValTy);
16258     NewStore = DAG.getTruncStore(
16259         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
16260         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
16261         FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
16262   }
16263 
16264   // Replace all merged stores with the new store.
16265   for (unsigned i = 0; i < NumStores; ++i)
16266     CombineTo(StoreNodes[i].MemNode, NewStore);
16267 
16268   AddToWorklist(NewChain.getNode());
16269   return true;
16270 }
16271 
16272 void DAGCombiner::getStoreMergeCandidates(
16273     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
16274     SDNode *&RootNode) {
16275   // This holds the base pointer, index, and the offset in bytes from the base
16276   // pointer. We must have a base and an offset. Do not handle stores to undef
16277   // base pointers.
16278   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
16279   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
16280     return;
16281 
16282   SDValue Val = peekThroughBitcasts(St->getValue());
16283   StoreSource StoreSrc = getStoreSource(Val);
16284   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
16285 
16286   // Match on loadbaseptr if relevant.
16287   EVT MemVT = St->getMemoryVT();
16288   BaseIndexOffset LBasePtr;
16289   EVT LoadVT;
16290   if (StoreSrc == StoreSource::Load) {
16291     auto *Ld = cast<LoadSDNode>(Val);
16292     LBasePtr = BaseIndexOffset::match(Ld, DAG);
16293     LoadVT = Ld->getMemoryVT();
16294     // Load and store should be the same type.
16295     if (MemVT != LoadVT)
16296       return;
16297     // Loads must only have one use.
16298     if (!Ld->hasNUsesOfValue(1, 0))
16299       return;
16300     // The memory operands must not be volatile/indexed/atomic.
16301     // TODO: May be able to relax for unordered atomics (see D66309)
16302     if (!Ld->isSimple() || Ld->isIndexed())
16303       return;
16304   }
16305   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
16306                             int64_t &Offset) -> bool {
16307     // The memory operands must not be volatile/indexed/atomic.
16308     // TODO: May be able to relax for unordered atomics (see D66309)
16309     if (!Other->isSimple() || Other->isIndexed())
16310       return false;
16311     // Don't mix temporal stores with non-temporal stores.
16312     if (St->isNonTemporal() != Other->isNonTemporal())
16313       return false;
16314     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
16315     // Allow merging constants of different types as integers.
16316     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
16317                                            : Other->getMemoryVT() != MemVT;
16318     switch (StoreSrc) {
16319     case StoreSource::Load: {
16320       if (NoTypeMatch)
16321         return false;
16322       // The Load's Base Ptr must also match.
16323       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
16324       if (!OtherLd)
16325         return false;
16326       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
16327       if (LoadVT != OtherLd->getMemoryVT())
16328         return false;
16329       // Loads must only have one use.
16330       if (!OtherLd->hasNUsesOfValue(1, 0))
16331         return false;
16332       // The memory operands must not be volatile/indexed/atomic.
16333       // TODO: May be able to relax for unordered atomics (see D66309)
16334       if (!OtherLd->isSimple() || OtherLd->isIndexed())
16335         return false;
16336       // Don't mix temporal loads with non-temporal loads.
16337       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
16338         return false;
16339       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
16340         return false;
16341       break;
16342     }
16343     case StoreSource::Constant:
16344       if (NoTypeMatch)
16345         return false;
16346       if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
16347         return false;
16348       break;
16349     case StoreSource::Extract:
16350       // Do not merge truncated stores here.
16351       if (Other->isTruncatingStore())
16352         return false;
16353       if (!MemVT.bitsEq(OtherBC.getValueType()))
16354         return false;
16355       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
16356           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16357         return false;
16358       break;
16359     default:
16360       llvm_unreachable("Unhandled store source for merging");
16361     }
16362     Ptr = BaseIndexOffset::match(Other, DAG);
16363     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
16364   };
16365 
16366   // Check if the pair of StoreNode and the RootNode already bail out many
16367   // times which is over the limit in dependence check.
16368   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
16369                                         SDNode *RootNode) -> bool {
16370     auto RootCount = StoreRootCountMap.find(StoreNode);
16371     return RootCount != StoreRootCountMap.end() &&
16372            RootCount->second.first == RootNode &&
16373            RootCount->second.second > StoreMergeDependenceLimit;
16374   };
16375 
16376   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
16377     // This must be a chain use.
16378     if (UseIter.getOperandNo() != 0)
16379       return;
16380     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
16381       BaseIndexOffset Ptr;
16382       int64_t PtrDiff;
16383       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
16384           !OverLimitInDependenceCheck(OtherStore, RootNode))
16385         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
16386     }
16387   };
16388 
16389   // We looking for a root node which is an ancestor to all mergable
16390   // stores. We search up through a load, to our root and then down
16391   // through all children. For instance we will find Store{1,2,3} if
16392   // St is Store1, Store2. or Store3 where the root is not a load
16393   // which always true for nonvolatile ops. TODO: Expand
16394   // the search to find all valid candidates through multiple layers of loads.
16395   //
16396   // Root
16397   // |-------|-------|
16398   // Load    Load    Store3
16399   // |       |
16400   // Store1   Store2
16401   //
16402   // FIXME: We should be able to climb and
16403   // descend TokenFactors to find candidates as well.
16404 
16405   RootNode = St->getChain().getNode();
16406 
16407   unsigned NumNodesExplored = 0;
16408   const unsigned MaxSearchNodes = 1024;
16409   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
16410     RootNode = Ldn->getChain().getNode();
16411     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16412          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
16413       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
16414         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
16415           TryToAddCandidate(I2);
16416       }
16417     }
16418   } else {
16419     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16420          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
16421       TryToAddCandidate(I);
16422   }
16423 }
16424 
16425 // We need to check that merging these stores does not cause a loop in
16426 // the DAG. Any store candidate may depend on another candidate
16427 // indirectly through its operand (we already consider dependencies
16428 // through the chain). Check in parallel by searching up from
16429 // non-chain operands of candidates.
16430 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
16431     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
16432     SDNode *RootNode) {
16433   // FIXME: We should be able to truncate a full search of
16434   // predecessors by doing a BFS and keeping tabs the originating
16435   // stores from which worklist nodes come from in a similar way to
16436   // TokenFactor simplfication.
16437 
16438   SmallPtrSet<const SDNode *, 32> Visited;
16439   SmallVector<const SDNode *, 8> Worklist;
16440 
16441   // RootNode is a predecessor to all candidates so we need not search
16442   // past it. Add RootNode (peeking through TokenFactors). Do not count
16443   // these towards size check.
16444 
16445   Worklist.push_back(RootNode);
16446   while (!Worklist.empty()) {
16447     auto N = Worklist.pop_back_val();
16448     if (!Visited.insert(N).second)
16449       continue; // Already present in Visited.
16450     if (N->getOpcode() == ISD::TokenFactor) {
16451       for (SDValue Op : N->ops())
16452         Worklist.push_back(Op.getNode());
16453     }
16454   }
16455 
16456   // Don't count pruning nodes towards max.
16457   unsigned int Max = 1024 + Visited.size();
16458   // Search Ops of store candidates.
16459   for (unsigned i = 0; i < NumStores; ++i) {
16460     SDNode *N = StoreNodes[i].MemNode;
16461     // Of the 4 Store Operands:
16462     //   * Chain (Op 0) -> We have already considered these
16463     //                    in candidate selection and can be
16464     //                    safely ignored
16465     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
16466     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
16467     //                       but aren't necessarily fromt the same base node, so
16468     //                       cycles possible (e.g. via indexed store).
16469     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
16470     //               non-indexed stores). Not constant on all targets (e.g. ARM)
16471     //               and so can participate in a cycle.
16472     for (unsigned j = 1; j < N->getNumOperands(); ++j)
16473       Worklist.push_back(N->getOperand(j).getNode());
16474   }
16475   // Search through DAG. We can stop early if we find a store node.
16476   for (unsigned i = 0; i < NumStores; ++i)
16477     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
16478                                      Max)) {
16479       // If the searching bail out, record the StoreNode and RootNode in the
16480       // StoreRootCountMap. If we have seen the pair many times over a limit,
16481       // we won't add the StoreNode into StoreNodes set again.
16482       if (Visited.size() >= Max) {
16483         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
16484         if (RootCount.first == RootNode)
16485           RootCount.second++;
16486         else
16487           RootCount = {RootNode, 1};
16488       }
16489       return false;
16490     }
16491   return true;
16492 }
16493 
16494 unsigned
16495 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
16496                                   int64_t ElementSizeBytes) const {
16497   while (true) {
16498     // Find a store past the width of the first store.
16499     size_t StartIdx = 0;
16500     while ((StartIdx + 1 < StoreNodes.size()) &&
16501            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
16502               StoreNodes[StartIdx + 1].OffsetFromBase)
16503       ++StartIdx;
16504 
16505     // Bail if we don't have enough candidates to merge.
16506     if (StartIdx + 1 >= StoreNodes.size())
16507       return 0;
16508 
16509     // Trim stores that overlapped with the first store.
16510     if (StartIdx)
16511       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
16512 
16513     // Scan the memory operations on the chain and find the first
16514     // non-consecutive store memory address.
16515     unsigned NumConsecutiveStores = 1;
16516     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16517     // Check that the addresses are consecutive starting from the second
16518     // element in the list of stores.
16519     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
16520       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
16521       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16522         break;
16523       NumConsecutiveStores = i + 1;
16524     }
16525     if (NumConsecutiveStores > 1)
16526       return NumConsecutiveStores;
16527 
16528     // There are no consecutive stores at the start of the list.
16529     // Remove the first store and try again.
16530     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
16531   }
16532 }
16533 
16534 bool DAGCombiner::tryStoreMergeOfConstants(
16535     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16536     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
16537   LLVMContext &Context = *DAG.getContext();
16538   const DataLayout &DL = DAG.getDataLayout();
16539   int64_t ElementSizeBytes = MemVT.getStoreSize();
16540   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16541   bool MadeChange = false;
16542 
16543   // Store the constants into memory as one consecutive store.
16544   while (NumConsecutiveStores >= 2) {
16545     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16546     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16547     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16548     unsigned LastLegalType = 1;
16549     unsigned LastLegalVectorType = 1;
16550     bool LastIntegerTrunc = false;
16551     bool NonZero = false;
16552     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
16553     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16554       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
16555       SDValue StoredVal = ST->getValue();
16556       bool IsElementZero = false;
16557       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
16558         IsElementZero = C->isNullValue();
16559       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
16560         IsElementZero = C->getConstantFPValue()->isNullValue();
16561       if (IsElementZero) {
16562         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
16563           FirstZeroAfterNonZero = i;
16564       }
16565       NonZero |= !IsElementZero;
16566 
16567       // Find a legal type for the constant store.
16568       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16569       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16570       bool IsFast = false;
16571 
16572       // Break early when size is too large to be legal.
16573       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16574         break;
16575 
16576       if (TLI.isTypeLegal(StoreTy) &&
16577           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16578           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16579                                  *FirstInChain->getMemOperand(), &IsFast) &&
16580           IsFast) {
16581         LastIntegerTrunc = false;
16582         LastLegalType = i + 1;
16583         // Or check whether a truncstore is legal.
16584       } else if (TLI.getTypeAction(Context, StoreTy) ==
16585                  TargetLowering::TypePromoteInteger) {
16586         EVT LegalizedStoredValTy =
16587             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
16588         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16589             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16590             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16591                                    *FirstInChain->getMemOperand(), &IsFast) &&
16592             IsFast) {
16593           LastIntegerTrunc = true;
16594           LastLegalType = i + 1;
16595         }
16596       }
16597 
16598       // We only use vectors if the constant is known to be zero or the
16599       // target allows it and the function is not marked with the
16600       // noimplicitfloat attribute.
16601       if ((!NonZero ||
16602            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
16603           AllowVectors) {
16604         // Find a legal type for the vector store.
16605         unsigned Elts = (i + 1) * NumMemElts;
16606         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16607         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
16608             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16609             TLI.allowsMemoryAccess(Context, DL, Ty,
16610                                    *FirstInChain->getMemOperand(), &IsFast) &&
16611             IsFast)
16612           LastLegalVectorType = i + 1;
16613       }
16614     }
16615 
16616     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
16617     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
16618 
16619     // Check if we found a legal integer type that creates a meaningful
16620     // merge.
16621     if (NumElem < 2) {
16622       // We know that candidate stores are in order and of correct
16623       // shape. While there is no mergeable sequence from the
16624       // beginning one may start later in the sequence. The only
16625       // reason a merge of size N could have failed where another of
16626       // the same size would not have, is if the alignment has
16627       // improved or we've dropped a non-zero value. Drop as many
16628       // candidates as we can here.
16629       unsigned NumSkip = 1;
16630       while ((NumSkip < NumConsecutiveStores) &&
16631              (NumSkip < FirstZeroAfterNonZero) &&
16632              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16633         NumSkip++;
16634 
16635       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16636       NumConsecutiveStores -= NumSkip;
16637       continue;
16638     }
16639 
16640     // Check that we can merge these candidates without causing a cycle.
16641     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16642                                                   RootNode)) {
16643       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16644       NumConsecutiveStores -= NumElem;
16645       continue;
16646     }
16647 
16648     MadeChange |= mergeStoresOfConstantsOrVecElts(
16649         StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
16650 
16651     // Remove merged stores for next iteration.
16652     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16653     NumConsecutiveStores -= NumElem;
16654   }
16655   return MadeChange;
16656 }
16657 
16658 bool DAGCombiner::tryStoreMergeOfExtracts(
16659     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16660     EVT MemVT, SDNode *RootNode) {
16661   LLVMContext &Context = *DAG.getContext();
16662   const DataLayout &DL = DAG.getDataLayout();
16663   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16664   bool MadeChange = false;
16665 
16666   // Loop on Consecutive Stores on success.
16667   while (NumConsecutiveStores >= 2) {
16668     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16669     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16670     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16671     unsigned NumStoresToMerge = 1;
16672     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16673       // Find a legal type for the vector store.
16674       unsigned Elts = (i + 1) * NumMemElts;
16675       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16676       bool IsFast = false;
16677 
16678       // Break early when size is too large to be legal.
16679       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
16680         break;
16681 
16682       if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16683           TLI.allowsMemoryAccess(Context, DL, Ty,
16684                                  *FirstInChain->getMemOperand(), &IsFast) &&
16685           IsFast)
16686         NumStoresToMerge = i + 1;
16687     }
16688 
16689     // Check if we found a legal integer type creating a meaningful
16690     // merge.
16691     if (NumStoresToMerge < 2) {
16692       // We know that candidate stores are in order and of correct
16693       // shape. While there is no mergeable sequence from the
16694       // beginning one may start later in the sequence. The only
16695       // reason a merge of size N could have failed where another of
16696       // the same size would not have, is if the alignment has
16697       // improved. Drop as many candidates as we can here.
16698       unsigned NumSkip = 1;
16699       while ((NumSkip < NumConsecutiveStores) &&
16700              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16701         NumSkip++;
16702 
16703       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16704       NumConsecutiveStores -= NumSkip;
16705       continue;
16706     }
16707 
16708     // Check that we can merge these candidates without causing a cycle.
16709     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
16710                                                   RootNode)) {
16711       StoreNodes.erase(StoreNodes.begin(),
16712                        StoreNodes.begin() + NumStoresToMerge);
16713       NumConsecutiveStores -= NumStoresToMerge;
16714       continue;
16715     }
16716 
16717     MadeChange |= mergeStoresOfConstantsOrVecElts(
16718         StoreNodes, MemVT, NumStoresToMerge, false, true, false);
16719 
16720     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
16721     NumConsecutiveStores -= NumStoresToMerge;
16722   }
16723   return MadeChange;
16724 }
16725 
16726 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
16727                                        unsigned NumConsecutiveStores, EVT MemVT,
16728                                        SDNode *RootNode, bool AllowVectors,
16729                                        bool IsNonTemporalStore,
16730                                        bool IsNonTemporalLoad) {
16731   LLVMContext &Context = *DAG.getContext();
16732   const DataLayout &DL = DAG.getDataLayout();
16733   int64_t ElementSizeBytes = MemVT.getStoreSize();
16734   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16735   bool MadeChange = false;
16736 
16737   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16738 
16739   // Look for load nodes which are used by the stored values.
16740   SmallVector<MemOpLink, 8> LoadNodes;
16741 
16742   // Find acceptable loads. Loads need to have the same chain (token factor),
16743   // must not be zext, volatile, indexed, and they must be consecutive.
16744   BaseIndexOffset LdBasePtr;
16745 
16746   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16747     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16748     SDValue Val = peekThroughBitcasts(St->getValue());
16749     LoadSDNode *Ld = cast<LoadSDNode>(Val);
16750 
16751     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
16752     // If this is not the first ptr that we check.
16753     int64_t LdOffset = 0;
16754     if (LdBasePtr.getBase().getNode()) {
16755       // The base ptr must be the same.
16756       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
16757         break;
16758     } else {
16759       // Check that all other base pointers are the same as this one.
16760       LdBasePtr = LdPtr;
16761     }
16762 
16763     // We found a potential memory operand to merge.
16764     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
16765   }
16766 
16767   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
16768     Align RequiredAlignment;
16769     bool NeedRotate = false;
16770     if (LoadNodes.size() == 2) {
16771       // If we have load/store pair instructions and we only have two values,
16772       // don't bother merging.
16773       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
16774           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
16775         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
16776         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
16777         break;
16778       }
16779       // If the loads are reversed, see if we can rotate the halves into place.
16780       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
16781       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
16782       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
16783       if (Offset0 - Offset1 == ElementSizeBytes &&
16784           (hasOperation(ISD::ROTL, PairVT) ||
16785            hasOperation(ISD::ROTR, PairVT))) {
16786         std::swap(LoadNodes[0], LoadNodes[1]);
16787         NeedRotate = true;
16788       }
16789     }
16790     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16791     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16792     Align FirstStoreAlign = FirstInChain->getAlign();
16793     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
16794 
16795     // Scan the memory operations on the chain and find the first
16796     // non-consecutive load memory address. These variables hold the index in
16797     // the store node array.
16798 
16799     unsigned LastConsecutiveLoad = 1;
16800 
16801     // This variable refers to the size and not index in the array.
16802     unsigned LastLegalVectorType = 1;
16803     unsigned LastLegalIntegerType = 1;
16804     bool isDereferenceable = true;
16805     bool DoIntegerTruncate = false;
16806     StartAddress = LoadNodes[0].OffsetFromBase;
16807     SDValue LoadChain = FirstLoad->getChain();
16808     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
16809       // All loads must share the same chain.
16810       if (LoadNodes[i].MemNode->getChain() != LoadChain)
16811         break;
16812 
16813       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
16814       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16815         break;
16816       LastConsecutiveLoad = i;
16817 
16818       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
16819         isDereferenceable = false;
16820 
16821       // Find a legal type for the vector store.
16822       unsigned Elts = (i + 1) * NumMemElts;
16823       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16824 
16825       // Break early when size is too large to be legal.
16826       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16827         break;
16828 
16829       bool IsFastSt = false;
16830       bool IsFastLd = false;
16831       if (TLI.isTypeLegal(StoreTy) &&
16832           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16833           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16834                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
16835           IsFastSt &&
16836           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16837                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
16838           IsFastLd) {
16839         LastLegalVectorType = i + 1;
16840       }
16841 
16842       // Find a legal type for the integer store.
16843       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16844       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16845       if (TLI.isTypeLegal(StoreTy) &&
16846           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16847           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16848                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
16849           IsFastSt &&
16850           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16851                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
16852           IsFastLd) {
16853         LastLegalIntegerType = i + 1;
16854         DoIntegerTruncate = false;
16855         // Or check whether a truncstore and extload is legal.
16856       } else if (TLI.getTypeAction(Context, StoreTy) ==
16857                  TargetLowering::TypePromoteInteger) {
16858         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
16859         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16860             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16861             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
16862             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
16863             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
16864             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16865                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
16866             IsFastSt &&
16867             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16868                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
16869             IsFastLd) {
16870           LastLegalIntegerType = i + 1;
16871           DoIntegerTruncate = true;
16872         }
16873       }
16874     }
16875 
16876     // Only use vector types if the vector type is larger than the integer
16877     // type. If they are the same, use integers.
16878     bool UseVectorTy =
16879         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
16880     unsigned LastLegalType =
16881         std::max(LastLegalVectorType, LastLegalIntegerType);
16882 
16883     // We add +1 here because the LastXXX variables refer to location while
16884     // the NumElem refers to array/index size.
16885     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
16886     NumElem = std::min(LastLegalType, NumElem);
16887     Align FirstLoadAlign = FirstLoad->getAlign();
16888 
16889     if (NumElem < 2) {
16890       // We know that candidate stores are in order and of correct
16891       // shape. While there is no mergeable sequence from the
16892       // beginning one may start later in the sequence. The only
16893       // reason a merge of size N could have failed where another of
16894       // the same size would not have is if the alignment or either
16895       // the load or store has improved. Drop as many candidates as we
16896       // can here.
16897       unsigned NumSkip = 1;
16898       while ((NumSkip < LoadNodes.size()) &&
16899              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
16900              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
16901         NumSkip++;
16902       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16903       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
16904       NumConsecutiveStores -= NumSkip;
16905       continue;
16906     }
16907 
16908     // Check that we can merge these candidates without causing a cycle.
16909     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16910                                                   RootNode)) {
16911       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16912       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16913       NumConsecutiveStores -= NumElem;
16914       continue;
16915     }
16916 
16917     // Find if it is better to use vectors or integers to load and store
16918     // to memory.
16919     EVT JointMemOpVT;
16920     if (UseVectorTy) {
16921       // Find a legal type for the vector store.
16922       unsigned Elts = NumElem * NumMemElts;
16923       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16924     } else {
16925       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
16926       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
16927     }
16928 
16929     SDLoc LoadDL(LoadNodes[0].MemNode);
16930     SDLoc StoreDL(StoreNodes[0].MemNode);
16931 
16932     // The merged loads are required to have the same incoming chain, so
16933     // using the first's chain is acceptable.
16934 
16935     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
16936     AddToWorklist(NewStoreChain.getNode());
16937 
16938     MachineMemOperand::Flags LdMMOFlags =
16939         isDereferenceable ? MachineMemOperand::MODereferenceable
16940                           : MachineMemOperand::MONone;
16941     if (IsNonTemporalLoad)
16942       LdMMOFlags |= MachineMemOperand::MONonTemporal;
16943 
16944     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
16945                                               ? MachineMemOperand::MONonTemporal
16946                                               : MachineMemOperand::MONone;
16947 
16948     SDValue NewLoad, NewStore;
16949     if (UseVectorTy || !DoIntegerTruncate) {
16950       NewLoad = DAG.getLoad(
16951           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
16952           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
16953       SDValue StoreOp = NewLoad;
16954       if (NeedRotate) {
16955         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
16956         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
16957                "Unexpected type for rotate-able load pair");
16958         SDValue RotAmt =
16959             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
16960         // Target can convert to the identical ROTR if it does not have ROTL.
16961         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
16962       }
16963       NewStore = DAG.getStore(
16964           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
16965           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16966     } else { // This must be the truncstore/extload case
16967       EVT ExtendedTy =
16968           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16969       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16970                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
16971                                FirstLoad->getPointerInfo(), JointMemOpVT,
16972                                FirstLoadAlign, LdMMOFlags);
16973       NewStore = DAG.getTruncStore(
16974           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
16975           FirstInChain->getPointerInfo(), JointMemOpVT,
16976           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
16977     }
16978 
16979     // Transfer chain users from old loads to the new load.
16980     for (unsigned i = 0; i < NumElem; ++i) {
16981       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16982       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
16983                                     SDValue(NewLoad.getNode(), 1));
16984     }
16985 
16986     // Replace all stores with the new store. Recursively remove corresponding
16987     // values if they are no longer used.
16988     for (unsigned i = 0; i < NumElem; ++i) {
16989       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16990       CombineTo(StoreNodes[i].MemNode, NewStore);
16991       if (Val.getNode()->use_empty())
16992         recursivelyDeleteUnusedNodes(Val.getNode());
16993     }
16994 
16995     MadeChange = true;
16996     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16997     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16998     NumConsecutiveStores -= NumElem;
16999   }
17000   return MadeChange;
17001 }
17002 
17003 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17004   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
17005     return false;
17006 
17007   // TODO: Extend this function to merge stores of scalable vectors.
17008   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17009   // store since we know <vscale x 16 x i8> is exactly twice as large as
17010   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17011   EVT MemVT = St->getMemoryVT();
17012   if (MemVT.isScalableVector())
17013     return false;
17014   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17015     return false;
17016 
17017   // This function cannot currently deal with non-byte-sized memory sizes.
17018   int64_t ElementSizeBytes = MemVT.getStoreSize();
17019   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17020     return false;
17021 
17022   // Do not bother looking at stored values that are not constants, loads, or
17023   // extracted vector elements.
17024   SDValue StoredVal = peekThroughBitcasts(St->getValue());
17025   const StoreSource StoreSrc = getStoreSource(StoredVal);
17026   if (StoreSrc == StoreSource::Unknown)
17027     return false;
17028 
17029   SmallVector<MemOpLink, 8> StoreNodes;
17030   SDNode *RootNode;
17031   // Find potential store merge candidates by searching through chain sub-DAG
17032   getStoreMergeCandidates(St, StoreNodes, RootNode);
17033 
17034   // Check if there is anything to merge.
17035   if (StoreNodes.size() < 2)
17036     return false;
17037 
17038   // Sort the memory operands according to their distance from the
17039   // base pointer.
17040   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17041     return LHS.OffsetFromBase < RHS.OffsetFromBase;
17042   });
17043 
17044   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17045       Attribute::NoImplicitFloat);
17046   bool IsNonTemporalStore = St->isNonTemporal();
17047   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17048                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
17049 
17050   // Store Merge attempts to merge the lowest stores. This generally
17051   // works out as if successful, as the remaining stores are checked
17052   // after the first collection of stores is merged. However, in the
17053   // case that a non-mergeable store is found first, e.g., {p[-2],
17054   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17055   // mergeable cases. To prevent this, we prune such stores from the
17056   // front of StoreNodes here.
17057   bool MadeChange = false;
17058   while (StoreNodes.size() > 1) {
17059     unsigned NumConsecutiveStores =
17060         getConsecutiveStores(StoreNodes, ElementSizeBytes);
17061     // There are no more stores in the list to examine.
17062     if (NumConsecutiveStores == 0)
17063       return MadeChange;
17064 
17065     // We have at least 2 consecutive stores. Try to merge them.
17066     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
17067     switch (StoreSrc) {
17068     case StoreSource::Constant:
17069       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17070                                              MemVT, RootNode, AllowVectors);
17071       break;
17072 
17073     case StoreSource::Extract:
17074       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17075                                             MemVT, RootNode);
17076       break;
17077 
17078     case StoreSource::Load:
17079       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17080                                          MemVT, RootNode, AllowVectors,
17081                                          IsNonTemporalStore, IsNonTemporalLoad);
17082       break;
17083 
17084     default:
17085       llvm_unreachable("Unhandled store source type");
17086     }
17087   }
17088   return MadeChange;
17089 }
17090 
17091 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17092   SDLoc SL(ST);
17093   SDValue ReplStore;
17094 
17095   // Replace the chain to avoid dependency.
17096   if (ST->isTruncatingStore()) {
17097     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17098                                   ST->getBasePtr(), ST->getMemoryVT(),
17099                                   ST->getMemOperand());
17100   } else {
17101     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17102                              ST->getMemOperand());
17103   }
17104 
17105   // Create token to keep both nodes around.
17106   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17107                               MVT::Other, ST->getChain(), ReplStore);
17108 
17109   // Make sure the new and old chains are cleaned up.
17110   AddToWorklist(Token.getNode());
17111 
17112   // Don't add users to work list.
17113   return CombineTo(ST, Token, false);
17114 }
17115 
17116 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17117   SDValue Value = ST->getValue();
17118   if (Value.getOpcode() == ISD::TargetConstantFP)
17119     return SDValue();
17120 
17121   if (!ISD::isNormalStore(ST))
17122     return SDValue();
17123 
17124   SDLoc DL(ST);
17125 
17126   SDValue Chain = ST->getChain();
17127   SDValue Ptr = ST->getBasePtr();
17128 
17129   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
17130 
17131   // NOTE: If the original store is volatile, this transform must not increase
17132   // the number of stores.  For example, on x86-32 an f64 can be stored in one
17133   // processor operation but an i64 (which is not legal) requires two.  So the
17134   // transform should not be done in this case.
17135 
17136   SDValue Tmp;
17137   switch (CFP->getSimpleValueType(0).SimpleTy) {
17138   default:
17139     llvm_unreachable("Unknown FP type");
17140   case MVT::f16:    // We don't do this for these yet.
17141   case MVT::f80:
17142   case MVT::f128:
17143   case MVT::ppcf128:
17144     return SDValue();
17145   case MVT::f32:
17146     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
17147         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17148       ;
17149       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
17150                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
17151                             MVT::i32);
17152       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
17153     }
17154 
17155     return SDValue();
17156   case MVT::f64:
17157     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
17158          ST->isSimple()) ||
17159         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
17160       ;
17161       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
17162                             getZExtValue(), SDLoc(CFP), MVT::i64);
17163       return DAG.getStore(Chain, DL, Tmp,
17164                           Ptr, ST->getMemOperand());
17165     }
17166 
17167     if (ST->isSimple() &&
17168         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17169       // Many FP stores are not made apparent until after legalize, e.g. for
17170       // argument passing.  Since this is so common, custom legalize the
17171       // 64-bit integer store into two 32-bit stores.
17172       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
17173       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
17174       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
17175       if (DAG.getDataLayout().isBigEndian())
17176         std::swap(Lo, Hi);
17177 
17178       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17179       AAMDNodes AAInfo = ST->getAAInfo();
17180 
17181       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17182                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
17183       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
17184       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
17185                                  ST->getPointerInfo().getWithOffset(4),
17186                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
17187       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
17188                          St0, St1);
17189     }
17190 
17191     return SDValue();
17192   }
17193 }
17194 
17195 SDValue DAGCombiner::visitSTORE(SDNode *N) {
17196   StoreSDNode *ST  = cast<StoreSDNode>(N);
17197   SDValue Chain = ST->getChain();
17198   SDValue Value = ST->getValue();
17199   SDValue Ptr   = ST->getBasePtr();
17200 
17201   // If this is a store of a bit convert, store the input value if the
17202   // resultant store does not need a higher alignment than the original.
17203   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
17204       ST->isUnindexed()) {
17205     EVT SVT = Value.getOperand(0).getValueType();
17206     // If the store is volatile, we only want to change the store type if the
17207     // resulting store is legal. Otherwise we might increase the number of
17208     // memory accesses. We don't care if the original type was legal or not
17209     // as we assume software couldn't rely on the number of accesses of an
17210     // illegal type.
17211     // TODO: May be able to relax for unordered atomics (see D66309)
17212     if (((!LegalOperations && ST->isSimple()) ||
17213          TLI.isOperationLegal(ISD::STORE, SVT)) &&
17214         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
17215                                      DAG, *ST->getMemOperand())) {
17216       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
17217                           ST->getMemOperand());
17218     }
17219   }
17220 
17221   // Turn 'store undef, Ptr' -> nothing.
17222   if (Value.isUndef() && ST->isUnindexed())
17223     return Chain;
17224 
17225   // Try to infer better alignment information than the store already has.
17226   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
17227     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17228       if (*Alignment > ST->getAlign() &&
17229           isAligned(*Alignment, ST->getSrcValueOffset())) {
17230         SDValue NewStore =
17231             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
17232                               ST->getMemoryVT(), *Alignment,
17233                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
17234         // NewStore will always be N as we are only refining the alignment
17235         assert(NewStore.getNode() == N);
17236         (void)NewStore;
17237       }
17238     }
17239   }
17240 
17241   // Try transforming a pair floating point load / store ops to integer
17242   // load / store ops.
17243   if (SDValue NewST = TransformFPLoadStorePair(N))
17244     return NewST;
17245 
17246   // Try transforming several stores into STORE (BSWAP).
17247   if (SDValue Store = mergeTruncStores(ST))
17248     return Store;
17249 
17250   if (ST->isUnindexed()) {
17251     // Walk up chain skipping non-aliasing memory nodes, on this store and any
17252     // adjacent stores.
17253     if (findBetterNeighborChains(ST)) {
17254       // replaceStoreChain uses CombineTo, which handled all of the worklist
17255       // manipulation. Return the original node to not do anything else.
17256       return SDValue(ST, 0);
17257     }
17258     Chain = ST->getChain();
17259   }
17260 
17261   // FIXME: is there such a thing as a truncating indexed store?
17262   if (ST->isTruncatingStore() && ST->isUnindexed() &&
17263       Value.getValueType().isInteger() &&
17264       (!isa<ConstantSDNode>(Value) ||
17265        !cast<ConstantSDNode>(Value)->isOpaque())) {
17266     APInt TruncDemandedBits =
17267         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
17268                              ST->getMemoryVT().getScalarSizeInBits());
17269 
17270     // See if we can simplify the input to this truncstore with knowledge that
17271     // only the low bits are being used.  For example:
17272     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
17273     AddToWorklist(Value.getNode());
17274     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
17275       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
17276                                ST->getMemOperand());
17277 
17278     // Otherwise, see if we can simplify the operation with
17279     // SimplifyDemandedBits, which only works if the value has a single use.
17280     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
17281       // Re-visit the store if anything changed and the store hasn't been merged
17282       // with another node (N is deleted) SimplifyDemandedBits will add Value's
17283       // node back to the worklist if necessary, but we also need to re-visit
17284       // the Store node itself.
17285       if (N->getOpcode() != ISD::DELETED_NODE)
17286         AddToWorklist(N);
17287       return SDValue(N, 0);
17288     }
17289   }
17290 
17291   // If this is a load followed by a store to the same location, then the store
17292   // is dead/noop.
17293   // TODO: Can relax for unordered atomics (see D66309)
17294   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
17295     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
17296         ST->isUnindexed() && ST->isSimple() &&
17297         // There can't be any side effects between the load and store, such as
17298         // a call or store.
17299         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
17300       // The store is dead, remove it.
17301       return Chain;
17302     }
17303   }
17304 
17305   // TODO: Can relax for unordered atomics (see D66309)
17306   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
17307     if (ST->isUnindexed() && ST->isSimple() &&
17308         ST1->isUnindexed() && ST1->isSimple()) {
17309       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
17310           ST->getMemoryVT() == ST1->getMemoryVT()) {
17311         // If this is a store followed by a store with the same value to the
17312         // same location, then the store is dead/noop.
17313         return Chain;
17314       }
17315 
17316       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
17317           !ST1->getBasePtr().isUndef() &&
17318           // BaseIndexOffset and the code below requires knowing the size
17319           // of a vector, so bail out if MemoryVT is scalable.
17320           !ST1->getMemoryVT().isScalableVector()) {
17321         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
17322         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
17323         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
17324         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
17325         // If this is a store who's preceding store to a subset of the current
17326         // location and no one other node is chained to that store we can
17327         // effectively drop the store. Do not remove stores to undef as they may
17328         // be used as data sinks.
17329         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
17330           CombineTo(ST1, ST1->getChain());
17331           return SDValue();
17332         }
17333       }
17334     }
17335   }
17336 
17337   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
17338   // truncating store.  We can do this even if this is already a truncstore.
17339   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
17340       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
17341       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
17342                             ST->getMemoryVT())) {
17343     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
17344                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
17345   }
17346 
17347   // Always perform this optimization before types are legal. If the target
17348   // prefers, also try this after legalization to catch stores that were created
17349   // by intrinsics or other nodes.
17350   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
17351     while (true) {
17352       // There can be multiple store sequences on the same chain.
17353       // Keep trying to merge store sequences until we are unable to do so
17354       // or until we merge the last store on the chain.
17355       bool Changed = mergeConsecutiveStores(ST);
17356       if (!Changed) break;
17357       // Return N as merge only uses CombineTo and no worklist clean
17358       // up is necessary.
17359       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
17360         return SDValue(N, 0);
17361     }
17362   }
17363 
17364   // Try transforming N to an indexed store.
17365   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
17366     return SDValue(N, 0);
17367 
17368   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
17369   //
17370   // Make sure to do this only after attempting to merge stores in order to
17371   //  avoid changing the types of some subset of stores due to visit order,
17372   //  preventing their merging.
17373   if (isa<ConstantFPSDNode>(ST->getValue())) {
17374     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
17375       return NewSt;
17376   }
17377 
17378   if (SDValue NewSt = splitMergedValStore(ST))
17379     return NewSt;
17380 
17381   return ReduceLoadOpStoreWidth(N);
17382 }
17383 
17384 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
17385   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
17386   if (!LifetimeEnd->hasOffset())
17387     return SDValue();
17388 
17389   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
17390                                         LifetimeEnd->getOffset(), false);
17391 
17392   // We walk up the chains to find stores.
17393   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
17394   while (!Chains.empty()) {
17395     SDValue Chain = Chains.back();
17396     Chains.pop_back();
17397     if (!Chain.hasOneUse())
17398       continue;
17399     switch (Chain.getOpcode()) {
17400     case ISD::TokenFactor:
17401       for (unsigned Nops = Chain.getNumOperands(); Nops;)
17402         Chains.push_back(Chain.getOperand(--Nops));
17403       break;
17404     case ISD::LIFETIME_START:
17405     case ISD::LIFETIME_END:
17406       // We can forward past any lifetime start/end that can be proven not to
17407       // alias the node.
17408       if (!isAlias(Chain.getNode(), N))
17409         Chains.push_back(Chain.getOperand(0));
17410       break;
17411     case ISD::STORE: {
17412       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
17413       // TODO: Can relax for unordered atomics (see D66309)
17414       if (!ST->isSimple() || ST->isIndexed())
17415         continue;
17416       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
17417       // If we store purely within object bounds just before its lifetime ends,
17418       // we can remove the store.
17419       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
17420                                    ST->getMemoryVT().getStoreSizeInBits())) {
17421         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
17422                    dbgs() << "\nwithin LIFETIME_END of : ";
17423                    LifetimeEndBase.dump(); dbgs() << "\n");
17424         CombineTo(ST, ST->getChain());
17425         return SDValue(N, 0);
17426       }
17427     }
17428     }
17429   }
17430   return SDValue();
17431 }
17432 
17433 /// For the instruction sequence of store below, F and I values
17434 /// are bundled together as an i64 value before being stored into memory.
17435 /// Sometimes it is more efficent to generate separate stores for F and I,
17436 /// which can remove the bitwise instructions or sink them to colder places.
17437 ///
17438 ///   (store (or (zext (bitcast F to i32) to i64),
17439 ///              (shl (zext I to i64), 32)), addr)  -->
17440 ///   (store F, addr) and (store I, addr+4)
17441 ///
17442 /// Similarly, splitting for other merged store can also be beneficial, like:
17443 /// For pair of {i32, i32}, i64 store --> two i32 stores.
17444 /// For pair of {i32, i16}, i64 store --> two i32 stores.
17445 /// For pair of {i16, i16}, i32 store --> two i16 stores.
17446 /// For pair of {i16, i8},  i32 store --> two i16 stores.
17447 /// For pair of {i8, i8},   i16 store --> two i8 stores.
17448 ///
17449 /// We allow each target to determine specifically which kind of splitting is
17450 /// supported.
17451 ///
17452 /// The store patterns are commonly seen from the simple code snippet below
17453 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
17454 ///   void goo(const std::pair<int, float> &);
17455 ///   hoo() {
17456 ///     ...
17457 ///     goo(std::make_pair(tmp, ftmp));
17458 ///     ...
17459 ///   }
17460 ///
17461 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
17462   if (OptLevel == CodeGenOpt::None)
17463     return SDValue();
17464 
17465   // Can't change the number of memory accesses for a volatile store or break
17466   // atomicity for an atomic one.
17467   if (!ST->isSimple())
17468     return SDValue();
17469 
17470   SDValue Val = ST->getValue();
17471   SDLoc DL(ST);
17472 
17473   // Match OR operand.
17474   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
17475     return SDValue();
17476 
17477   // Match SHL operand and get Lower and Higher parts of Val.
17478   SDValue Op1 = Val.getOperand(0);
17479   SDValue Op2 = Val.getOperand(1);
17480   SDValue Lo, Hi;
17481   if (Op1.getOpcode() != ISD::SHL) {
17482     std::swap(Op1, Op2);
17483     if (Op1.getOpcode() != ISD::SHL)
17484       return SDValue();
17485   }
17486   Lo = Op2;
17487   Hi = Op1.getOperand(0);
17488   if (!Op1.hasOneUse())
17489     return SDValue();
17490 
17491   // Match shift amount to HalfValBitSize.
17492   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
17493   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
17494   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
17495     return SDValue();
17496 
17497   // Lo and Hi are zero-extended from int with size less equal than 32
17498   // to i64.
17499   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
17500       !Lo.getOperand(0).getValueType().isScalarInteger() ||
17501       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
17502       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
17503       !Hi.getOperand(0).getValueType().isScalarInteger() ||
17504       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
17505     return SDValue();
17506 
17507   // Use the EVT of low and high parts before bitcast as the input
17508   // of target query.
17509   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
17510                   ? Lo.getOperand(0).getValueType()
17511                   : Lo.getValueType();
17512   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
17513                    ? Hi.getOperand(0).getValueType()
17514                    : Hi.getValueType();
17515   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
17516     return SDValue();
17517 
17518   // Start to split store.
17519   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17520   AAMDNodes AAInfo = ST->getAAInfo();
17521 
17522   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
17523   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
17524   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
17525   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
17526 
17527   SDValue Chain = ST->getChain();
17528   SDValue Ptr = ST->getBasePtr();
17529   // Lower value store.
17530   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17531                              ST->getOriginalAlign(), MMOFlags, AAInfo);
17532   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
17533   // Higher value store.
17534   SDValue St1 = DAG.getStore(
17535       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
17536       ST->getOriginalAlign(), MMOFlags, AAInfo);
17537   return St1;
17538 }
17539 
17540 /// Convert a disguised subvector insertion into a shuffle:
17541 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
17542   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
17543          "Expected extract_vector_elt");
17544   SDValue InsertVal = N->getOperand(1);
17545   SDValue Vec = N->getOperand(0);
17546 
17547   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
17548   // InsIndex)
17549   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
17550   //   CONCAT_VECTORS.
17551   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
17552       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17553       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
17554     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
17555     ArrayRef<int> Mask = SVN->getMask();
17556 
17557     SDValue X = Vec.getOperand(0);
17558     SDValue Y = Vec.getOperand(1);
17559 
17560     // Vec's operand 0 is using indices from 0 to N-1 and
17561     // operand 1 from N to 2N - 1, where N is the number of
17562     // elements in the vectors.
17563     SDValue InsertVal0 = InsertVal.getOperand(0);
17564     int ElementOffset = -1;
17565 
17566     // We explore the inputs of the shuffle in order to see if we find the
17567     // source of the extract_vector_elt. If so, we can use it to modify the
17568     // shuffle rather than perform an insert_vector_elt.
17569     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
17570     ArgWorkList.emplace_back(Mask.size(), Y);
17571     ArgWorkList.emplace_back(0, X);
17572 
17573     while (!ArgWorkList.empty()) {
17574       int ArgOffset;
17575       SDValue ArgVal;
17576       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
17577 
17578       if (ArgVal == InsertVal0) {
17579         ElementOffset = ArgOffset;
17580         break;
17581       }
17582 
17583       // Peek through concat_vector.
17584       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
17585         int CurrentArgOffset =
17586             ArgOffset + ArgVal.getValueType().getVectorNumElements();
17587         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
17588         for (SDValue Op : reverse(ArgVal->ops())) {
17589           CurrentArgOffset -= Step;
17590           ArgWorkList.emplace_back(CurrentArgOffset, Op);
17591         }
17592 
17593         // Make sure we went through all the elements and did not screw up index
17594         // computation.
17595         assert(CurrentArgOffset == ArgOffset);
17596       }
17597     }
17598 
17599     if (ElementOffset != -1) {
17600       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
17601 
17602       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
17603       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
17604       assert(NewMask[InsIndex] <
17605                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
17606              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
17607 
17608       SDValue LegalShuffle =
17609               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
17610                                           Y, NewMask, DAG);
17611       if (LegalShuffle)
17612         return LegalShuffle;
17613     }
17614   }
17615 
17616   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
17617   // bitcast(shuffle (bitcast V), (extended X), Mask)
17618   // Note: We do not use an insert_subvector node because that requires a
17619   // legal subvector type.
17620   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
17621       !InsertVal.getOperand(0).getValueType().isVector())
17622     return SDValue();
17623 
17624   SDValue SubVec = InsertVal.getOperand(0);
17625   SDValue DestVec = N->getOperand(0);
17626   EVT SubVecVT = SubVec.getValueType();
17627   EVT VT = DestVec.getValueType();
17628   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
17629   // If the source only has a single vector element, the cost of creating adding
17630   // it to a vector is likely to exceed the cost of a insert_vector_elt.
17631   if (NumSrcElts == 1)
17632     return SDValue();
17633   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
17634   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
17635 
17636   // Step 1: Create a shuffle mask that implements this insert operation. The
17637   // vector that we are inserting into will be operand 0 of the shuffle, so
17638   // those elements are just 'i'. The inserted subvector is in the first
17639   // positions of operand 1 of the shuffle. Example:
17640   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
17641   SmallVector<int, 16> Mask(NumMaskVals);
17642   for (unsigned i = 0; i != NumMaskVals; ++i) {
17643     if (i / NumSrcElts == InsIndex)
17644       Mask[i] = (i % NumSrcElts) + NumMaskVals;
17645     else
17646       Mask[i] = i;
17647   }
17648 
17649   // Bail out if the target can not handle the shuffle we want to create.
17650   EVT SubVecEltVT = SubVecVT.getVectorElementType();
17651   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
17652   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
17653     return SDValue();
17654 
17655   // Step 2: Create a wide vector from the inserted source vector by appending
17656   // undefined elements. This is the same size as our destination vector.
17657   SDLoc DL(N);
17658   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
17659   ConcatOps[0] = SubVec;
17660   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
17661 
17662   // Step 3: Shuffle in the padded subvector.
17663   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
17664   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
17665   AddToWorklist(PaddedSubV.getNode());
17666   AddToWorklist(DestVecBC.getNode());
17667   AddToWorklist(Shuf.getNode());
17668   return DAG.getBitcast(VT, Shuf);
17669 }
17670 
17671 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
17672   SDValue InVec = N->getOperand(0);
17673   SDValue InVal = N->getOperand(1);
17674   SDValue EltNo = N->getOperand(2);
17675   SDLoc DL(N);
17676 
17677   EVT VT = InVec.getValueType();
17678   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17679 
17680   // Insert into out-of-bounds element is undefined.
17681   if (IndexC && VT.isFixedLengthVector() &&
17682       IndexC->getZExtValue() >= VT.getVectorNumElements())
17683     return DAG.getUNDEF(VT);
17684 
17685   // Remove redundant insertions:
17686   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
17687   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17688       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
17689     return InVec;
17690 
17691   if (!IndexC) {
17692     // If this is variable insert to undef vector, it might be better to splat:
17693     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
17694     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
17695       if (VT.isScalableVector())
17696         return DAG.getSplatVector(VT, DL, InVal);
17697       else {
17698         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
17699         return DAG.getBuildVector(VT, DL, Ops);
17700       }
17701     }
17702     return SDValue();
17703   }
17704 
17705   if (VT.isScalableVector())
17706     return SDValue();
17707 
17708   unsigned NumElts = VT.getVectorNumElements();
17709 
17710   // We must know which element is being inserted for folds below here.
17711   unsigned Elt = IndexC->getZExtValue();
17712   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
17713     return Shuf;
17714 
17715   // Canonicalize insert_vector_elt dag nodes.
17716   // Example:
17717   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
17718   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
17719   //
17720   // Do this only if the child insert_vector node has one use; also
17721   // do this only if indices are both constants and Idx1 < Idx0.
17722   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
17723       && isa<ConstantSDNode>(InVec.getOperand(2))) {
17724     unsigned OtherElt = InVec.getConstantOperandVal(2);
17725     if (Elt < OtherElt) {
17726       // Swap nodes.
17727       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
17728                                   InVec.getOperand(0), InVal, EltNo);
17729       AddToWorklist(NewOp.getNode());
17730       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
17731                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
17732     }
17733   }
17734 
17735   // If we can't generate a legal BUILD_VECTOR, exit
17736   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
17737     return SDValue();
17738 
17739   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
17740   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
17741   // vector elements.
17742   SmallVector<SDValue, 8> Ops;
17743   // Do not combine these two vectors if the output vector will not replace
17744   // the input vector.
17745   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
17746     Ops.append(InVec.getNode()->op_begin(),
17747                InVec.getNode()->op_end());
17748   } else if (InVec.isUndef()) {
17749     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
17750   } else {
17751     return SDValue();
17752   }
17753   assert(Ops.size() == NumElts && "Unexpected vector size");
17754 
17755   // Insert the element
17756   if (Elt < Ops.size()) {
17757     // All the operands of BUILD_VECTOR must have the same type;
17758     // we enforce that here.
17759     EVT OpVT = Ops[0].getValueType();
17760     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
17761   }
17762 
17763   // Return the new vector
17764   return DAG.getBuildVector(VT, DL, Ops);
17765 }
17766 
17767 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
17768                                                   SDValue EltNo,
17769                                                   LoadSDNode *OriginalLoad) {
17770   assert(OriginalLoad->isSimple());
17771 
17772   EVT ResultVT = EVE->getValueType(0);
17773   EVT VecEltVT = InVecVT.getVectorElementType();
17774   Align Alignment = OriginalLoad->getAlign();
17775   Align NewAlign = DAG.getDataLayout().getABITypeAlign(
17776       VecEltVT.getTypeForEVT(*DAG.getContext()));
17777 
17778   if (NewAlign > Alignment ||
17779       !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
17780     return SDValue();
17781 
17782   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
17783     ISD::NON_EXTLOAD : ISD::EXTLOAD;
17784   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
17785     return SDValue();
17786 
17787   Alignment = NewAlign;
17788 
17789   SDValue NewPtr = OriginalLoad->getBasePtr();
17790   SDValue Offset;
17791   EVT PtrType = NewPtr.getValueType();
17792   MachinePointerInfo MPI;
17793   SDLoc DL(EVE);
17794   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
17795     int Elt = ConstEltNo->getZExtValue();
17796     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
17797     Offset = DAG.getConstant(PtrOff, DL, PtrType);
17798     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
17799   } else {
17800     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
17801     Offset = DAG.getNode(
17802         ISD::MUL, DL, PtrType, Offset,
17803         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
17804     // Discard the pointer info except the address space because the memory
17805     // operand can't represent this new access since the offset is variable.
17806     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
17807   }
17808   NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
17809 
17810   // The replacement we need to do here is a little tricky: we need to
17811   // replace an extractelement of a load with a load.
17812   // Use ReplaceAllUsesOfValuesWith to do the replacement.
17813   // Note that this replacement assumes that the extractvalue is the only
17814   // use of the load; that's okay because we don't want to perform this
17815   // transformation in other cases anyway.
17816   SDValue Load;
17817   SDValue Chain;
17818   if (ResultVT.bitsGT(VecEltVT)) {
17819     // If the result type of vextract is wider than the load, then issue an
17820     // extending load instead.
17821     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
17822                                                   VecEltVT)
17823                                    ? ISD::ZEXTLOAD
17824                                    : ISD::EXTLOAD;
17825     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
17826                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
17827                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
17828                           OriginalLoad->getAAInfo());
17829     Chain = Load.getValue(1);
17830   } else {
17831     Load = DAG.getLoad(
17832         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
17833         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
17834     Chain = Load.getValue(1);
17835     if (ResultVT.bitsLT(VecEltVT))
17836       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
17837     else
17838       Load = DAG.getBitcast(ResultVT, Load);
17839   }
17840   WorklistRemover DeadNodes(*this);
17841   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
17842   SDValue To[] = { Load, Chain };
17843   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
17844   // Make sure to revisit this node to clean it up; it will usually be dead.
17845   AddToWorklist(EVE);
17846   // Since we're explicitly calling ReplaceAllUses, add the new node to the
17847   // worklist explicitly as well.
17848   AddToWorklistWithUsers(Load.getNode());
17849   ++OpsNarrowed;
17850   return SDValue(EVE, 0);
17851 }
17852 
17853 /// Transform a vector binary operation into a scalar binary operation by moving
17854 /// the math/logic after an extract element of a vector.
17855 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
17856                                        bool LegalOperations) {
17857   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17858   SDValue Vec = ExtElt->getOperand(0);
17859   SDValue Index = ExtElt->getOperand(1);
17860   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17861   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
17862       Vec.getNode()->getNumValues() != 1)
17863     return SDValue();
17864 
17865   // Targets may want to avoid this to prevent an expensive register transfer.
17866   if (!TLI.shouldScalarizeBinop(Vec))
17867     return SDValue();
17868 
17869   // Extracting an element of a vector constant is constant-folded, so this
17870   // transform is just replacing a vector op with a scalar op while moving the
17871   // extract.
17872   SDValue Op0 = Vec.getOperand(0);
17873   SDValue Op1 = Vec.getOperand(1);
17874   if (isAnyConstantBuildVector(Op0, true) ||
17875       isAnyConstantBuildVector(Op1, true)) {
17876     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
17877     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
17878     SDLoc DL(ExtElt);
17879     EVT VT = ExtElt->getValueType(0);
17880     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
17881     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
17882     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
17883   }
17884 
17885   return SDValue();
17886 }
17887 
17888 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
17889   SDValue VecOp = N->getOperand(0);
17890   SDValue Index = N->getOperand(1);
17891   EVT ScalarVT = N->getValueType(0);
17892   EVT VecVT = VecOp.getValueType();
17893   if (VecOp.isUndef())
17894     return DAG.getUNDEF(ScalarVT);
17895 
17896   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
17897   //
17898   // This only really matters if the index is non-constant since other combines
17899   // on the constant elements already work.
17900   SDLoc DL(N);
17901   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
17902       Index == VecOp.getOperand(2)) {
17903     SDValue Elt = VecOp.getOperand(1);
17904     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
17905   }
17906 
17907   // (vextract (scalar_to_vector val, 0) -> val
17908   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17909     // Only 0'th element of SCALAR_TO_VECTOR is defined.
17910     if (DAG.isKnownNeverZero(Index))
17911       return DAG.getUNDEF(ScalarVT);
17912 
17913     // Check if the result type doesn't match the inserted element type. A
17914     // SCALAR_TO_VECTOR may truncate the inserted element and the
17915     // EXTRACT_VECTOR_ELT may widen the extracted vector.
17916     SDValue InOp = VecOp.getOperand(0);
17917     if (InOp.getValueType() != ScalarVT) {
17918       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17919       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17920     }
17921     return InOp;
17922   }
17923 
17924   // extract_vector_elt of out-of-bounds element -> UNDEF
17925   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17926   if (IndexC && VecVT.isFixedLengthVector() &&
17927       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
17928     return DAG.getUNDEF(ScalarVT);
17929 
17930   // extract_vector_elt (build_vector x, y), 1 -> y
17931   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
17932        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
17933       TLI.isTypeLegal(VecVT) &&
17934       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
17935     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
17936             VecVT.isFixedLengthVector()) &&
17937            "BUILD_VECTOR used for scalable vectors");
17938     unsigned IndexVal =
17939         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
17940     SDValue Elt = VecOp.getOperand(IndexVal);
17941     EVT InEltVT = Elt.getValueType();
17942 
17943     // Sometimes build_vector's scalar input types do not match result type.
17944     if (ScalarVT == InEltVT)
17945       return Elt;
17946 
17947     // TODO: It may be useful to truncate if free if the build_vector implicitly
17948     // converts.
17949   }
17950 
17951   if (VecVT.isScalableVector())
17952     return SDValue();
17953 
17954   // All the code from this point onwards assumes fixed width vectors, but it's
17955   // possible that some of the combinations could be made to work for scalable
17956   // vectors too.
17957   unsigned NumElts = VecVT.getVectorNumElements();
17958   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
17959 
17960   // TODO: These transforms should not require the 'hasOneUse' restriction, but
17961   // there are regressions on multiple targets without it. We can end up with a
17962   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
17963   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
17964       VecOp.hasOneUse()) {
17965     // The vector index of the LSBs of the source depend on the endian-ness.
17966     bool IsLE = DAG.getDataLayout().isLittleEndian();
17967     unsigned ExtractIndex = IndexC->getZExtValue();
17968     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
17969     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
17970     SDValue BCSrc = VecOp.getOperand(0);
17971     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
17972       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
17973 
17974     if (LegalTypes && BCSrc.getValueType().isInteger() &&
17975         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17976       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
17977       // trunc i64 X to i32
17978       SDValue X = BCSrc.getOperand(0);
17979       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
17980              "Extract element and scalar to vector can't change element type "
17981              "from FP to integer.");
17982       unsigned XBitWidth = X.getValueSizeInBits();
17983       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
17984 
17985       // An extract element return value type can be wider than its vector
17986       // operand element type. In that case, the high bits are undefined, so
17987       // it's possible that we may need to extend rather than truncate.
17988       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
17989         assert(XBitWidth % VecEltBitWidth == 0 &&
17990                "Scalar bitwidth must be a multiple of vector element bitwidth");
17991         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
17992       }
17993     }
17994   }
17995 
17996   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
17997     return BO;
17998 
17999   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18000   // We only perform this optimization before the op legalization phase because
18001   // we may introduce new vector instructions which are not backed by TD
18002   // patterns. For example on AVX, extracting elements from a wide vector
18003   // without using extract_subvector. However, if we can find an underlying
18004   // scalar value, then we can always use that.
18005   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18006     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18007     // Find the new index to extract from.
18008     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18009 
18010     // Extracting an undef index is undef.
18011     if (OrigElt == -1)
18012       return DAG.getUNDEF(ScalarVT);
18013 
18014     // Select the right vector half to extract from.
18015     SDValue SVInVec;
18016     if (OrigElt < (int)NumElts) {
18017       SVInVec = VecOp.getOperand(0);
18018     } else {
18019       SVInVec = VecOp.getOperand(1);
18020       OrigElt -= NumElts;
18021     }
18022 
18023     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18024       SDValue InOp = SVInVec.getOperand(OrigElt);
18025       if (InOp.getValueType() != ScalarVT) {
18026         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18027         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18028       }
18029 
18030       return InOp;
18031     }
18032 
18033     // FIXME: We should handle recursing on other vector shuffles and
18034     // scalar_to_vector here as well.
18035 
18036     if (!LegalOperations ||
18037         // FIXME: Should really be just isOperationLegalOrCustom.
18038         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
18039         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18040       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18041                          DAG.getVectorIdxConstant(OrigElt, DL));
18042     }
18043   }
18044 
18045   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18046   // simplify it based on the (valid) extraction indices.
18047   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18048         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18049                Use->getOperand(0) == VecOp &&
18050                isa<ConstantSDNode>(Use->getOperand(1));
18051       })) {
18052     APInt DemandedElts = APInt::getNullValue(NumElts);
18053     for (SDNode *Use : VecOp->uses()) {
18054       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18055       if (CstElt->getAPIntValue().ult(NumElts))
18056         DemandedElts.setBit(CstElt->getZExtValue());
18057     }
18058     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18059       // We simplified the vector operand of this extract element. If this
18060       // extract is not dead, visit it again so it is folded properly.
18061       if (N->getOpcode() != ISD::DELETED_NODE)
18062         AddToWorklist(N);
18063       return SDValue(N, 0);
18064     }
18065     APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
18066     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18067       // We simplified the vector operand of this extract element. If this
18068       // extract is not dead, visit it again so it is folded properly.
18069       if (N->getOpcode() != ISD::DELETED_NODE)
18070         AddToWorklist(N);
18071       return SDValue(N, 0);
18072     }
18073   }
18074 
18075   // Everything under here is trying to match an extract of a loaded value.
18076   // If the result of load has to be truncated, then it's not necessarily
18077   // profitable.
18078   bool BCNumEltsChanged = false;
18079   EVT ExtVT = VecVT.getVectorElementType();
18080   EVT LVT = ExtVT;
18081   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18082     return SDValue();
18083 
18084   if (VecOp.getOpcode() == ISD::BITCAST) {
18085     // Don't duplicate a load with other uses.
18086     if (!VecOp.hasOneUse())
18087       return SDValue();
18088 
18089     EVT BCVT = VecOp.getOperand(0).getValueType();
18090     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
18091       return SDValue();
18092     if (NumElts != BCVT.getVectorNumElements())
18093       BCNumEltsChanged = true;
18094     VecOp = VecOp.getOperand(0);
18095     ExtVT = BCVT.getVectorElementType();
18096   }
18097 
18098   // extract (vector load $addr), i --> load $addr + i * size
18099   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18100       ISD::isNormalLoad(VecOp.getNode()) &&
18101       !Index->hasPredecessor(VecOp.getNode())) {
18102     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18103     if (VecLoad && VecLoad->isSimple())
18104       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18105   }
18106 
18107   // Perform only after legalization to ensure build_vector / vector_shuffle
18108   // optimizations have already been done.
18109   if (!LegalOperations || !IndexC)
18110     return SDValue();
18111 
18112   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
18113   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
18114   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
18115   int Elt = IndexC->getZExtValue();
18116   LoadSDNode *LN0 = nullptr;
18117   if (ISD::isNormalLoad(VecOp.getNode())) {
18118     LN0 = cast<LoadSDNode>(VecOp);
18119   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18120              VecOp.getOperand(0).getValueType() == ExtVT &&
18121              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
18122     // Don't duplicate a load with other uses.
18123     if (!VecOp.hasOneUse())
18124       return SDValue();
18125 
18126     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
18127   }
18128   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
18129     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
18130     // =>
18131     // (load $addr+1*size)
18132 
18133     // Don't duplicate a load with other uses.
18134     if (!VecOp.hasOneUse())
18135       return SDValue();
18136 
18137     // If the bit convert changed the number of elements, it is unsafe
18138     // to examine the mask.
18139     if (BCNumEltsChanged)
18140       return SDValue();
18141 
18142     // Select the input vector, guarding against out of range extract vector.
18143     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
18144     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
18145 
18146     if (VecOp.getOpcode() == ISD::BITCAST) {
18147       // Don't duplicate a load with other uses.
18148       if (!VecOp.hasOneUse())
18149         return SDValue();
18150 
18151       VecOp = VecOp.getOperand(0);
18152     }
18153     if (ISD::isNormalLoad(VecOp.getNode())) {
18154       LN0 = cast<LoadSDNode>(VecOp);
18155       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
18156       Index = DAG.getConstant(Elt, DL, Index.getValueType());
18157     }
18158   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
18159              VecVT.getVectorElementType() == ScalarVT &&
18160              (!LegalTypes ||
18161               TLI.isTypeLegal(
18162                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
18163     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
18164     //      -> extract_vector_elt a, 0
18165     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
18166     //      -> extract_vector_elt a, 1
18167     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
18168     //      -> extract_vector_elt b, 0
18169     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
18170     //      -> extract_vector_elt b, 1
18171     SDLoc SL(N);
18172     EVT ConcatVT = VecOp.getOperand(0).getValueType();
18173     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
18174     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
18175                                      Index.getValueType());
18176 
18177     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
18178     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
18179                               ConcatVT.getVectorElementType(),
18180                               ConcatOp, NewIdx);
18181     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
18182   }
18183 
18184   // Make sure we found a non-volatile load and the extractelement is
18185   // the only use.
18186   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
18187     return SDValue();
18188 
18189   // If Idx was -1 above, Elt is going to be -1, so just return undef.
18190   if (Elt == -1)
18191     return DAG.getUNDEF(LVT);
18192 
18193   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
18194 }
18195 
18196 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
18197 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
18198   // We perform this optimization post type-legalization because
18199   // the type-legalizer often scalarizes integer-promoted vectors.
18200   // Performing this optimization before may create bit-casts which
18201   // will be type-legalized to complex code sequences.
18202   // We perform this optimization only before the operation legalizer because we
18203   // may introduce illegal operations.
18204   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
18205     return SDValue();
18206 
18207   unsigned NumInScalars = N->getNumOperands();
18208   SDLoc DL(N);
18209   EVT VT = N->getValueType(0);
18210 
18211   // Check to see if this is a BUILD_VECTOR of a bunch of values
18212   // which come from any_extend or zero_extend nodes. If so, we can create
18213   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
18214   // optimizations. We do not handle sign-extend because we can't fill the sign
18215   // using shuffles.
18216   EVT SourceType = MVT::Other;
18217   bool AllAnyExt = true;
18218 
18219   for (unsigned i = 0; i != NumInScalars; ++i) {
18220     SDValue In = N->getOperand(i);
18221     // Ignore undef inputs.
18222     if (In.isUndef()) continue;
18223 
18224     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
18225     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
18226 
18227     // Abort if the element is not an extension.
18228     if (!ZeroExt && !AnyExt) {
18229       SourceType = MVT::Other;
18230       break;
18231     }
18232 
18233     // The input is a ZeroExt or AnyExt. Check the original type.
18234     EVT InTy = In.getOperand(0).getValueType();
18235 
18236     // Check that all of the widened source types are the same.
18237     if (SourceType == MVT::Other)
18238       // First time.
18239       SourceType = InTy;
18240     else if (InTy != SourceType) {
18241       // Multiple income types. Abort.
18242       SourceType = MVT::Other;
18243       break;
18244     }
18245 
18246     // Check if all of the extends are ANY_EXTENDs.
18247     AllAnyExt &= AnyExt;
18248   }
18249 
18250   // In order to have valid types, all of the inputs must be extended from the
18251   // same source type and all of the inputs must be any or zero extend.
18252   // Scalar sizes must be a power of two.
18253   EVT OutScalarTy = VT.getScalarType();
18254   bool ValidTypes = SourceType != MVT::Other &&
18255                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
18256                  isPowerOf2_32(SourceType.getSizeInBits());
18257 
18258   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
18259   // turn into a single shuffle instruction.
18260   if (!ValidTypes)
18261     return SDValue();
18262 
18263   // If we already have a splat buildvector, then don't fold it if it means
18264   // introducing zeros.
18265   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
18266     return SDValue();
18267 
18268   bool isLE = DAG.getDataLayout().isLittleEndian();
18269   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
18270   assert(ElemRatio > 1 && "Invalid element size ratio");
18271   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
18272                                DAG.getConstant(0, DL, SourceType);
18273 
18274   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
18275   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
18276 
18277   // Populate the new build_vector
18278   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18279     SDValue Cast = N->getOperand(i);
18280     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
18281             Cast.getOpcode() == ISD::ZERO_EXTEND ||
18282             Cast.isUndef()) && "Invalid cast opcode");
18283     SDValue In;
18284     if (Cast.isUndef())
18285       In = DAG.getUNDEF(SourceType);
18286     else
18287       In = Cast->getOperand(0);
18288     unsigned Index = isLE ? (i * ElemRatio) :
18289                             (i * ElemRatio + (ElemRatio - 1));
18290 
18291     assert(Index < Ops.size() && "Invalid index");
18292     Ops[Index] = In;
18293   }
18294 
18295   // The type of the new BUILD_VECTOR node.
18296   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
18297   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
18298          "Invalid vector size");
18299   // Check if the new vector type is legal.
18300   if (!isTypeLegal(VecVT) ||
18301       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
18302        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
18303     return SDValue();
18304 
18305   // Make the new BUILD_VECTOR.
18306   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
18307 
18308   // The new BUILD_VECTOR node has the potential to be further optimized.
18309   AddToWorklist(BV.getNode());
18310   // Bitcast to the desired type.
18311   return DAG.getBitcast(VT, BV);
18312 }
18313 
18314 // Simplify (build_vec (trunc $1)
18315 //                     (trunc (srl $1 half-width))
18316 //                     (trunc (srl $1 (2 * half-width))) …)
18317 // to (bitcast $1)
18318 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
18319   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18320 
18321   // Only for little endian
18322   if (!DAG.getDataLayout().isLittleEndian())
18323     return SDValue();
18324 
18325   SDLoc DL(N);
18326   EVT VT = N->getValueType(0);
18327   EVT OutScalarTy = VT.getScalarType();
18328   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
18329 
18330   // Only for power of two types to be sure that bitcast works well
18331   if (!isPowerOf2_64(ScalarTypeBitsize))
18332     return SDValue();
18333 
18334   unsigned NumInScalars = N->getNumOperands();
18335 
18336   // Look through bitcasts
18337   auto PeekThroughBitcast = [](SDValue Op) {
18338     if (Op.getOpcode() == ISD::BITCAST)
18339       return Op.getOperand(0);
18340     return Op;
18341   };
18342 
18343   // The source value where all the parts are extracted.
18344   SDValue Src;
18345   for (unsigned i = 0; i != NumInScalars; ++i) {
18346     SDValue In = PeekThroughBitcast(N->getOperand(i));
18347     // Ignore undef inputs.
18348     if (In.isUndef()) continue;
18349 
18350     if (In.getOpcode() != ISD::TRUNCATE)
18351       return SDValue();
18352 
18353     In = PeekThroughBitcast(In.getOperand(0));
18354 
18355     if (In.getOpcode() != ISD::SRL) {
18356       // For now only build_vec without shuffling, handle shifts here in the
18357       // future.
18358       if (i != 0)
18359         return SDValue();
18360 
18361       Src = In;
18362     } else {
18363       // In is SRL
18364       SDValue part = PeekThroughBitcast(In.getOperand(0));
18365 
18366       if (!Src) {
18367         Src = part;
18368       } else if (Src != part) {
18369         // Vector parts do not stem from the same variable
18370         return SDValue();
18371       }
18372 
18373       SDValue ShiftAmtVal = In.getOperand(1);
18374       if (!isa<ConstantSDNode>(ShiftAmtVal))
18375         return SDValue();
18376 
18377       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
18378 
18379       // The extracted value is not extracted at the right position
18380       if (ShiftAmt != i * ScalarTypeBitsize)
18381         return SDValue();
18382     }
18383   }
18384 
18385   // Only cast if the size is the same
18386   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
18387     return SDValue();
18388 
18389   return DAG.getBitcast(VT, Src);
18390 }
18391 
18392 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
18393                                            ArrayRef<int> VectorMask,
18394                                            SDValue VecIn1, SDValue VecIn2,
18395                                            unsigned LeftIdx, bool DidSplitVec) {
18396   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18397 
18398   EVT VT = N->getValueType(0);
18399   EVT InVT1 = VecIn1.getValueType();
18400   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
18401 
18402   unsigned NumElems = VT.getVectorNumElements();
18403   unsigned ShuffleNumElems = NumElems;
18404 
18405   // If we artificially split a vector in two already, then the offsets in the
18406   // operands will all be based off of VecIn1, even those in VecIn2.
18407   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
18408 
18409   // We can't generate a shuffle node with mismatched input and output types.
18410   // Try to make the types match the type of the output.
18411   if (InVT1 != VT || InVT2 != VT) {
18412     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
18413       // If the output vector length is a multiple of both input lengths,
18414       // we can concatenate them and pad the rest with undefs.
18415       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
18416       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
18417       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
18418       ConcatOps[0] = VecIn1;
18419       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
18420       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18421       VecIn2 = SDValue();
18422     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
18423       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
18424         return SDValue();
18425 
18426       if (!VecIn2.getNode()) {
18427         // If we only have one input vector, and it's twice the size of the
18428         // output, split it in two.
18429         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
18430                              DAG.getVectorIdxConstant(NumElems, DL));
18431         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
18432         // Since we now have shorter input vectors, adjust the offset of the
18433         // second vector's start.
18434         Vec2Offset = NumElems;
18435       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
18436         // VecIn1 is wider than the output, and we have another, possibly
18437         // smaller input. Pad the smaller input with undefs, shuffle at the
18438         // input vector width, and extract the output.
18439         // The shuffle type is different than VT, so check legality again.
18440         if (LegalOperations &&
18441             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
18442           return SDValue();
18443 
18444         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
18445         // lower it back into a BUILD_VECTOR. So if the inserted type is
18446         // illegal, don't even try.
18447         if (InVT1 != InVT2) {
18448           if (!TLI.isTypeLegal(InVT2))
18449             return SDValue();
18450           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
18451                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
18452         }
18453         ShuffleNumElems = NumElems * 2;
18454       } else {
18455         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
18456         // than VecIn1. We can't handle this for now - this case will disappear
18457         // when we start sorting the vectors by type.
18458         return SDValue();
18459       }
18460     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
18461                InVT1.getSizeInBits() == VT.getSizeInBits()) {
18462       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
18463       ConcatOps[0] = VecIn2;
18464       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18465     } else {
18466       // TODO: Support cases where the length mismatch isn't exactly by a
18467       // factor of 2.
18468       // TODO: Move this check upwards, so that if we have bad type
18469       // mismatches, we don't create any DAG nodes.
18470       return SDValue();
18471     }
18472   }
18473 
18474   // Initialize mask to undef.
18475   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
18476 
18477   // Only need to run up to the number of elements actually used, not the
18478   // total number of elements in the shuffle - if we are shuffling a wider
18479   // vector, the high lanes should be set to undef.
18480   for (unsigned i = 0; i != NumElems; ++i) {
18481     if (VectorMask[i] <= 0)
18482       continue;
18483 
18484     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
18485     if (VectorMask[i] == (int)LeftIdx) {
18486       Mask[i] = ExtIndex;
18487     } else if (VectorMask[i] == (int)LeftIdx + 1) {
18488       Mask[i] = Vec2Offset + ExtIndex;
18489     }
18490   }
18491 
18492   // The type the input vectors may have changed above.
18493   InVT1 = VecIn1.getValueType();
18494 
18495   // If we already have a VecIn2, it should have the same type as VecIn1.
18496   // If we don't, get an undef/zero vector of the appropriate type.
18497   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
18498   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
18499 
18500   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
18501   if (ShuffleNumElems > NumElems)
18502     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
18503 
18504   return Shuffle;
18505 }
18506 
18507 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
18508   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18509 
18510   // First, determine where the build vector is not undef.
18511   // TODO: We could extend this to handle zero elements as well as undefs.
18512   int NumBVOps = BV->getNumOperands();
18513   int ZextElt = -1;
18514   for (int i = 0; i != NumBVOps; ++i) {
18515     SDValue Op = BV->getOperand(i);
18516     if (Op.isUndef())
18517       continue;
18518     if (ZextElt == -1)
18519       ZextElt = i;
18520     else
18521       return SDValue();
18522   }
18523   // Bail out if there's no non-undef element.
18524   if (ZextElt == -1)
18525     return SDValue();
18526 
18527   // The build vector contains some number of undef elements and exactly
18528   // one other element. That other element must be a zero-extended scalar
18529   // extracted from a vector at a constant index to turn this into a shuffle.
18530   // Also, require that the build vector does not implicitly truncate/extend
18531   // its elements.
18532   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
18533   EVT VT = BV->getValueType(0);
18534   SDValue Zext = BV->getOperand(ZextElt);
18535   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
18536       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18537       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
18538       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
18539     return SDValue();
18540 
18541   // The zero-extend must be a multiple of the source size, and we must be
18542   // building a vector of the same size as the source of the extract element.
18543   SDValue Extract = Zext.getOperand(0);
18544   unsigned DestSize = Zext.getValueSizeInBits();
18545   unsigned SrcSize = Extract.getValueSizeInBits();
18546   if (DestSize % SrcSize != 0 ||
18547       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
18548     return SDValue();
18549 
18550   // Create a shuffle mask that will combine the extracted element with zeros
18551   // and undefs.
18552   int ZextRatio = DestSize / SrcSize;
18553   int NumMaskElts = NumBVOps * ZextRatio;
18554   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
18555   for (int i = 0; i != NumMaskElts; ++i) {
18556     if (i / ZextRatio == ZextElt) {
18557       // The low bits of the (potentially translated) extracted element map to
18558       // the source vector. The high bits map to zero. We will use a zero vector
18559       // as the 2nd source operand of the shuffle, so use the 1st element of
18560       // that vector (mask value is number-of-elements) for the high bits.
18561       if (i % ZextRatio == 0)
18562         ShufMask[i] = Extract.getConstantOperandVal(1);
18563       else
18564         ShufMask[i] = NumMaskElts;
18565     }
18566 
18567     // Undef elements of the build vector remain undef because we initialize
18568     // the shuffle mask with -1.
18569   }
18570 
18571   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
18572   // bitcast (shuffle V, ZeroVec, VectorMask)
18573   SDLoc DL(BV);
18574   EVT VecVT = Extract.getOperand(0).getValueType();
18575   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
18576   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18577   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
18578                                              ZeroVec, ShufMask, DAG);
18579   if (!Shuf)
18580     return SDValue();
18581   return DAG.getBitcast(VT, Shuf);
18582 }
18583 
18584 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
18585 // operations. If the types of the vectors we're extracting from allow it,
18586 // turn this into a vector_shuffle node.
18587 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
18588   SDLoc DL(N);
18589   EVT VT = N->getValueType(0);
18590 
18591   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
18592   if (!isTypeLegal(VT))
18593     return SDValue();
18594 
18595   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
18596     return V;
18597 
18598   // May only combine to shuffle after legalize if shuffle is legal.
18599   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
18600     return SDValue();
18601 
18602   bool UsesZeroVector = false;
18603   unsigned NumElems = N->getNumOperands();
18604 
18605   // Record, for each element of the newly built vector, which input vector
18606   // that element comes from. -1 stands for undef, 0 for the zero vector,
18607   // and positive values for the input vectors.
18608   // VectorMask maps each element to its vector number, and VecIn maps vector
18609   // numbers to their initial SDValues.
18610 
18611   SmallVector<int, 8> VectorMask(NumElems, -1);
18612   SmallVector<SDValue, 8> VecIn;
18613   VecIn.push_back(SDValue());
18614 
18615   for (unsigned i = 0; i != NumElems; ++i) {
18616     SDValue Op = N->getOperand(i);
18617 
18618     if (Op.isUndef())
18619       continue;
18620 
18621     // See if we can use a blend with a zero vector.
18622     // TODO: Should we generalize this to a blend with an arbitrary constant
18623     // vector?
18624     if (isNullConstant(Op) || isNullFPConstant(Op)) {
18625       UsesZeroVector = true;
18626       VectorMask[i] = 0;
18627       continue;
18628     }
18629 
18630     // Not an undef or zero. If the input is something other than an
18631     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
18632     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18633         !isa<ConstantSDNode>(Op.getOperand(1)))
18634       return SDValue();
18635     SDValue ExtractedFromVec = Op.getOperand(0);
18636 
18637     if (ExtractedFromVec.getValueType().isScalableVector())
18638       return SDValue();
18639 
18640     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
18641     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
18642       return SDValue();
18643 
18644     // All inputs must have the same element type as the output.
18645     if (VT.getVectorElementType() !=
18646         ExtractedFromVec.getValueType().getVectorElementType())
18647       return SDValue();
18648 
18649     // Have we seen this input vector before?
18650     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
18651     // a map back from SDValues to numbers isn't worth it.
18652     unsigned Idx = std::distance(
18653         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
18654     if (Idx == VecIn.size())
18655       VecIn.push_back(ExtractedFromVec);
18656 
18657     VectorMask[i] = Idx;
18658   }
18659 
18660   // If we didn't find at least one input vector, bail out.
18661   if (VecIn.size() < 2)
18662     return SDValue();
18663 
18664   // If all the Operands of BUILD_VECTOR extract from same
18665   // vector, then split the vector efficiently based on the maximum
18666   // vector access index and adjust the VectorMask and
18667   // VecIn accordingly.
18668   bool DidSplitVec = false;
18669   if (VecIn.size() == 2) {
18670     unsigned MaxIndex = 0;
18671     unsigned NearestPow2 = 0;
18672     SDValue Vec = VecIn.back();
18673     EVT InVT = Vec.getValueType();
18674     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
18675 
18676     for (unsigned i = 0; i < NumElems; i++) {
18677       if (VectorMask[i] <= 0)
18678         continue;
18679       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
18680       IndexVec[i] = Index;
18681       MaxIndex = std::max(MaxIndex, Index);
18682     }
18683 
18684     NearestPow2 = PowerOf2Ceil(MaxIndex);
18685     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
18686         NumElems * 2 < NearestPow2) {
18687       unsigned SplitSize = NearestPow2 / 2;
18688       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
18689                                      InVT.getVectorElementType(), SplitSize);
18690       if (TLI.isTypeLegal(SplitVT)) {
18691         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18692                                      DAG.getVectorIdxConstant(SplitSize, DL));
18693         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18694                                      DAG.getVectorIdxConstant(0, DL));
18695         VecIn.pop_back();
18696         VecIn.push_back(VecIn1);
18697         VecIn.push_back(VecIn2);
18698         DidSplitVec = true;
18699 
18700         for (unsigned i = 0; i < NumElems; i++) {
18701           if (VectorMask[i] <= 0)
18702             continue;
18703           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
18704         }
18705       }
18706     }
18707   }
18708 
18709   // TODO: We want to sort the vectors by descending length, so that adjacent
18710   // pairs have similar length, and the longer vector is always first in the
18711   // pair.
18712 
18713   // TODO: Should this fire if some of the input vectors has illegal type (like
18714   // it does now), or should we let legalization run its course first?
18715 
18716   // Shuffle phase:
18717   // Take pairs of vectors, and shuffle them so that the result has elements
18718   // from these vectors in the correct places.
18719   // For example, given:
18720   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
18721   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
18722   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
18723   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
18724   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
18725   // We will generate:
18726   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
18727   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
18728   SmallVector<SDValue, 4> Shuffles;
18729   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
18730     unsigned LeftIdx = 2 * In + 1;
18731     SDValue VecLeft = VecIn[LeftIdx];
18732     SDValue VecRight =
18733         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
18734 
18735     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
18736                                                 VecRight, LeftIdx, DidSplitVec))
18737       Shuffles.push_back(Shuffle);
18738     else
18739       return SDValue();
18740   }
18741 
18742   // If we need the zero vector as an "ingredient" in the blend tree, add it
18743   // to the list of shuffles.
18744   if (UsesZeroVector)
18745     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
18746                                       : DAG.getConstantFP(0.0, DL, VT));
18747 
18748   // If we only have one shuffle, we're done.
18749   if (Shuffles.size() == 1)
18750     return Shuffles[0];
18751 
18752   // Update the vector mask to point to the post-shuffle vectors.
18753   for (int &Vec : VectorMask)
18754     if (Vec == 0)
18755       Vec = Shuffles.size() - 1;
18756     else
18757       Vec = (Vec - 1) / 2;
18758 
18759   // More than one shuffle. Generate a binary tree of blends, e.g. if from
18760   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
18761   // generate:
18762   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
18763   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
18764   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
18765   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
18766   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
18767   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
18768   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
18769 
18770   // Make sure the initial size of the shuffle list is even.
18771   if (Shuffles.size() % 2)
18772     Shuffles.push_back(DAG.getUNDEF(VT));
18773 
18774   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
18775     if (CurSize % 2) {
18776       Shuffles[CurSize] = DAG.getUNDEF(VT);
18777       CurSize++;
18778     }
18779     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
18780       int Left = 2 * In;
18781       int Right = 2 * In + 1;
18782       SmallVector<int, 8> Mask(NumElems, -1);
18783       for (unsigned i = 0; i != NumElems; ++i) {
18784         if (VectorMask[i] == Left) {
18785           Mask[i] = i;
18786           VectorMask[i] = In;
18787         } else if (VectorMask[i] == Right) {
18788           Mask[i] = i + NumElems;
18789           VectorMask[i] = In;
18790         }
18791       }
18792 
18793       Shuffles[In] =
18794           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
18795     }
18796   }
18797   return Shuffles[0];
18798 }
18799 
18800 // Try to turn a build vector of zero extends of extract vector elts into a
18801 // a vector zero extend and possibly an extract subvector.
18802 // TODO: Support sign extend?
18803 // TODO: Allow undef elements?
18804 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
18805   if (LegalOperations)
18806     return SDValue();
18807 
18808   EVT VT = N->getValueType(0);
18809 
18810   bool FoundZeroExtend = false;
18811   SDValue Op0 = N->getOperand(0);
18812   auto checkElem = [&](SDValue Op) -> int64_t {
18813     unsigned Opc = Op.getOpcode();
18814     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
18815     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
18816         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18817         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
18818       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
18819         return C->getZExtValue();
18820     return -1;
18821   };
18822 
18823   // Make sure the first element matches
18824   // (zext (extract_vector_elt X, C))
18825   int64_t Offset = checkElem(Op0);
18826   if (Offset < 0)
18827     return SDValue();
18828 
18829   unsigned NumElems = N->getNumOperands();
18830   SDValue In = Op0.getOperand(0).getOperand(0);
18831   EVT InSVT = In.getValueType().getScalarType();
18832   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
18833 
18834   // Don't create an illegal input type after type legalization.
18835   if (LegalTypes && !TLI.isTypeLegal(InVT))
18836     return SDValue();
18837 
18838   // Ensure all the elements come from the same vector and are adjacent.
18839   for (unsigned i = 1; i != NumElems; ++i) {
18840     if ((Offset + i) != checkElem(N->getOperand(i)))
18841       return SDValue();
18842   }
18843 
18844   SDLoc DL(N);
18845   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
18846                    Op0.getOperand(0).getOperand(1));
18847   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
18848                      VT, In);
18849 }
18850 
18851 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
18852   EVT VT = N->getValueType(0);
18853 
18854   // A vector built entirely of undefs is undef.
18855   if (ISD::allOperandsUndef(N))
18856     return DAG.getUNDEF(VT);
18857 
18858   // If this is a splat of a bitcast from another vector, change to a
18859   // concat_vector.
18860   // For example:
18861   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
18862   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
18863   //
18864   // If X is a build_vector itself, the concat can become a larger build_vector.
18865   // TODO: Maybe this is useful for non-splat too?
18866   if (!LegalOperations) {
18867     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18868       Splat = peekThroughBitcasts(Splat);
18869       EVT SrcVT = Splat.getValueType();
18870       if (SrcVT.isVector()) {
18871         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
18872         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
18873                                      SrcVT.getVectorElementType(), NumElts);
18874         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
18875           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
18876           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
18877                                        NewVT, Ops);
18878           return DAG.getBitcast(VT, Concat);
18879         }
18880       }
18881     }
18882   }
18883 
18884   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
18885   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
18886     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18887       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
18888       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
18889     }
18890 
18891   // Check if we can express BUILD VECTOR via subvector extract.
18892   if (!LegalTypes && (N->getNumOperands() > 1)) {
18893     SDValue Op0 = N->getOperand(0);
18894     auto checkElem = [&](SDValue Op) -> uint64_t {
18895       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
18896           (Op0.getOperand(0) == Op.getOperand(0)))
18897         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
18898           return CNode->getZExtValue();
18899       return -1;
18900     };
18901 
18902     int Offset = checkElem(Op0);
18903     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
18904       if (Offset + i != checkElem(N->getOperand(i))) {
18905         Offset = -1;
18906         break;
18907       }
18908     }
18909 
18910     if ((Offset == 0) &&
18911         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
18912       return Op0.getOperand(0);
18913     if ((Offset != -1) &&
18914         ((Offset % N->getValueType(0).getVectorNumElements()) ==
18915          0)) // IDX must be multiple of output size.
18916       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
18917                          Op0.getOperand(0), Op0.getOperand(1));
18918   }
18919 
18920   if (SDValue V = convertBuildVecZextToZext(N))
18921     return V;
18922 
18923   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
18924     return V;
18925 
18926   if (SDValue V = reduceBuildVecTruncToBitCast(N))
18927     return V;
18928 
18929   if (SDValue V = reduceBuildVecToShuffle(N))
18930     return V;
18931 
18932   return SDValue();
18933 }
18934 
18935 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
18936   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18937   EVT OpVT = N->getOperand(0).getValueType();
18938 
18939   // If the operands are legal vectors, leave them alone.
18940   if (TLI.isTypeLegal(OpVT))
18941     return SDValue();
18942 
18943   SDLoc DL(N);
18944   EVT VT = N->getValueType(0);
18945   SmallVector<SDValue, 8> Ops;
18946 
18947   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
18948   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18949 
18950   // Keep track of what we encounter.
18951   bool AnyInteger = false;
18952   bool AnyFP = false;
18953   for (const SDValue &Op : N->ops()) {
18954     if (ISD::BITCAST == Op.getOpcode() &&
18955         !Op.getOperand(0).getValueType().isVector())
18956       Ops.push_back(Op.getOperand(0));
18957     else if (ISD::UNDEF == Op.getOpcode())
18958       Ops.push_back(ScalarUndef);
18959     else
18960       return SDValue();
18961 
18962     // Note whether we encounter an integer or floating point scalar.
18963     // If it's neither, bail out, it could be something weird like x86mmx.
18964     EVT LastOpVT = Ops.back().getValueType();
18965     if (LastOpVT.isFloatingPoint())
18966       AnyFP = true;
18967     else if (LastOpVT.isInteger())
18968       AnyInteger = true;
18969     else
18970       return SDValue();
18971   }
18972 
18973   // If any of the operands is a floating point scalar bitcast to a vector,
18974   // use floating point types throughout, and bitcast everything.
18975   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
18976   if (AnyFP) {
18977     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
18978     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18979     if (AnyInteger) {
18980       for (SDValue &Op : Ops) {
18981         if (Op.getValueType() == SVT)
18982           continue;
18983         if (Op.isUndef())
18984           Op = ScalarUndef;
18985         else
18986           Op = DAG.getBitcast(SVT, Op);
18987       }
18988     }
18989   }
18990 
18991   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
18992                                VT.getSizeInBits() / SVT.getSizeInBits());
18993   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
18994 }
18995 
18996 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
18997 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
18998 // most two distinct vectors the same size as the result, attempt to turn this
18999 // into a legal shuffle.
19000 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19001   EVT VT = N->getValueType(0);
19002   EVT OpVT = N->getOperand(0).getValueType();
19003 
19004   // We currently can't generate an appropriate shuffle for a scalable vector.
19005   if (VT.isScalableVector())
19006     return SDValue();
19007 
19008   int NumElts = VT.getVectorNumElements();
19009   int NumOpElts = OpVT.getVectorNumElements();
19010 
19011   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19012   SmallVector<int, 8> Mask;
19013 
19014   for (SDValue Op : N->ops()) {
19015     Op = peekThroughBitcasts(Op);
19016 
19017     // UNDEF nodes convert to UNDEF shuffle mask values.
19018     if (Op.isUndef()) {
19019       Mask.append((unsigned)NumOpElts, -1);
19020       continue;
19021     }
19022 
19023     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19024       return SDValue();
19025 
19026     // What vector are we extracting the subvector from and at what index?
19027     SDValue ExtVec = Op.getOperand(0);
19028     int ExtIdx = Op.getConstantOperandVal(1);
19029 
19030     // We want the EVT of the original extraction to correctly scale the
19031     // extraction index.
19032     EVT ExtVT = ExtVec.getValueType();
19033     ExtVec = peekThroughBitcasts(ExtVec);
19034 
19035     // UNDEF nodes convert to UNDEF shuffle mask values.
19036     if (ExtVec.isUndef()) {
19037       Mask.append((unsigned)NumOpElts, -1);
19038       continue;
19039     }
19040 
19041     // Ensure that we are extracting a subvector from a vector the same
19042     // size as the result.
19043     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
19044       return SDValue();
19045 
19046     // Scale the subvector index to account for any bitcast.
19047     int NumExtElts = ExtVT.getVectorNumElements();
19048     if (0 == (NumExtElts % NumElts))
19049       ExtIdx /= (NumExtElts / NumElts);
19050     else if (0 == (NumElts % NumExtElts))
19051       ExtIdx *= (NumElts / NumExtElts);
19052     else
19053       return SDValue();
19054 
19055     // At most we can reference 2 inputs in the final shuffle.
19056     if (SV0.isUndef() || SV0 == ExtVec) {
19057       SV0 = ExtVec;
19058       for (int i = 0; i != NumOpElts; ++i)
19059         Mask.push_back(i + ExtIdx);
19060     } else if (SV1.isUndef() || SV1 == ExtVec) {
19061       SV1 = ExtVec;
19062       for (int i = 0; i != NumOpElts; ++i)
19063         Mask.push_back(i + ExtIdx + NumElts);
19064     } else {
19065       return SDValue();
19066     }
19067   }
19068 
19069   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19070   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
19071                                      DAG.getBitcast(VT, SV1), Mask, DAG);
19072 }
19073 
19074 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
19075   unsigned CastOpcode = N->getOperand(0).getOpcode();
19076   switch (CastOpcode) {
19077   case ISD::SINT_TO_FP:
19078   case ISD::UINT_TO_FP:
19079   case ISD::FP_TO_SINT:
19080   case ISD::FP_TO_UINT:
19081     // TODO: Allow more opcodes?
19082     //  case ISD::BITCAST:
19083     //  case ISD::TRUNCATE:
19084     //  case ISD::ZERO_EXTEND:
19085     //  case ISD::SIGN_EXTEND:
19086     //  case ISD::FP_EXTEND:
19087     break;
19088   default:
19089     return SDValue();
19090   }
19091 
19092   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
19093   if (!SrcVT.isVector())
19094     return SDValue();
19095 
19096   // All operands of the concat must be the same kind of cast from the same
19097   // source type.
19098   SmallVector<SDValue, 4> SrcOps;
19099   for (SDValue Op : N->ops()) {
19100     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
19101         Op.getOperand(0).getValueType() != SrcVT)
19102       return SDValue();
19103     SrcOps.push_back(Op.getOperand(0));
19104   }
19105 
19106   // The wider cast must be supported by the target. This is unusual because
19107   // the operation support type parameter depends on the opcode. In addition,
19108   // check the other type in the cast to make sure this is really legal.
19109   EVT VT = N->getValueType(0);
19110   EVT SrcEltVT = SrcVT.getVectorElementType();
19111   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
19112   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
19113   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19114   switch (CastOpcode) {
19115   case ISD::SINT_TO_FP:
19116   case ISD::UINT_TO_FP:
19117     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
19118         !TLI.isTypeLegal(VT))
19119       return SDValue();
19120     break;
19121   case ISD::FP_TO_SINT:
19122   case ISD::FP_TO_UINT:
19123     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
19124         !TLI.isTypeLegal(ConcatSrcVT))
19125       return SDValue();
19126     break;
19127   default:
19128     llvm_unreachable("Unexpected cast opcode");
19129   }
19130 
19131   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
19132   SDLoc DL(N);
19133   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
19134   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
19135 }
19136 
19137 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
19138   // If we only have one input vector, we don't need to do any concatenation.
19139   if (N->getNumOperands() == 1)
19140     return N->getOperand(0);
19141 
19142   // Check if all of the operands are undefs.
19143   EVT VT = N->getValueType(0);
19144   if (ISD::allOperandsUndef(N))
19145     return DAG.getUNDEF(VT);
19146 
19147   // Optimize concat_vectors where all but the first of the vectors are undef.
19148   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
19149         return Op.isUndef();
19150       })) {
19151     SDValue In = N->getOperand(0);
19152     assert(In.getValueType().isVector() && "Must concat vectors");
19153 
19154     // If the input is a concat_vectors, just make a larger concat by padding
19155     // with smaller undefs.
19156     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
19157       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
19158       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
19159       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
19160       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19161     }
19162 
19163     SDValue Scalar = peekThroughOneUseBitcasts(In);
19164 
19165     // concat_vectors(scalar_to_vector(scalar), undef) ->
19166     //     scalar_to_vector(scalar)
19167     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19168          Scalar.hasOneUse()) {
19169       EVT SVT = Scalar.getValueType().getVectorElementType();
19170       if (SVT == Scalar.getOperand(0).getValueType())
19171         Scalar = Scalar.getOperand(0);
19172     }
19173 
19174     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
19175     if (!Scalar.getValueType().isVector()) {
19176       // If the bitcast type isn't legal, it might be a trunc of a legal type;
19177       // look through the trunc so we can still do the transform:
19178       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
19179       if (Scalar->getOpcode() == ISD::TRUNCATE &&
19180           !TLI.isTypeLegal(Scalar.getValueType()) &&
19181           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
19182         Scalar = Scalar->getOperand(0);
19183 
19184       EVT SclTy = Scalar.getValueType();
19185 
19186       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
19187         return SDValue();
19188 
19189       // Bail out if the vector size is not a multiple of the scalar size.
19190       if (VT.getSizeInBits() % SclTy.getSizeInBits())
19191         return SDValue();
19192 
19193       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
19194       if (VNTNumElms < 2)
19195         return SDValue();
19196 
19197       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
19198       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
19199         return SDValue();
19200 
19201       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
19202       return DAG.getBitcast(VT, Res);
19203     }
19204   }
19205 
19206   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
19207   // We have already tested above for an UNDEF only concatenation.
19208   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
19209   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
19210   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
19211     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
19212   };
19213   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
19214     SmallVector<SDValue, 8> Opnds;
19215     EVT SVT = VT.getScalarType();
19216 
19217     EVT MinVT = SVT;
19218     if (!SVT.isFloatingPoint()) {
19219       // If BUILD_VECTOR are from built from integer, they may have different
19220       // operand types. Get the smallest type and truncate all operands to it.
19221       bool FoundMinVT = false;
19222       for (const SDValue &Op : N->ops())
19223         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19224           EVT OpSVT = Op.getOperand(0).getValueType();
19225           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
19226           FoundMinVT = true;
19227         }
19228       assert(FoundMinVT && "Concat vector type mismatch");
19229     }
19230 
19231     for (const SDValue &Op : N->ops()) {
19232       EVT OpVT = Op.getValueType();
19233       unsigned NumElts = OpVT.getVectorNumElements();
19234 
19235       if (ISD::UNDEF == Op.getOpcode())
19236         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
19237 
19238       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19239         if (SVT.isFloatingPoint()) {
19240           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
19241           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
19242         } else {
19243           for (unsigned i = 0; i != NumElts; ++i)
19244             Opnds.push_back(
19245                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
19246         }
19247       }
19248     }
19249 
19250     assert(VT.getVectorNumElements() == Opnds.size() &&
19251            "Concat vector type mismatch");
19252     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
19253   }
19254 
19255   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
19256   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
19257     return V;
19258 
19259   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
19260   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
19261     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
19262       return V;
19263 
19264   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
19265     return V;
19266 
19267   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
19268   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
19269   // operands and look for a CONCAT operations that place the incoming vectors
19270   // at the exact same location.
19271   //
19272   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
19273   SDValue SingleSource = SDValue();
19274   unsigned PartNumElem =
19275       N->getOperand(0).getValueType().getVectorMinNumElements();
19276 
19277   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19278     SDValue Op = N->getOperand(i);
19279 
19280     if (Op.isUndef())
19281       continue;
19282 
19283     // Check if this is the identity extract:
19284     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19285       return SDValue();
19286 
19287     // Find the single incoming vector for the extract_subvector.
19288     if (SingleSource.getNode()) {
19289       if (Op.getOperand(0) != SingleSource)
19290         return SDValue();
19291     } else {
19292       SingleSource = Op.getOperand(0);
19293 
19294       // Check the source type is the same as the type of the result.
19295       // If not, this concat may extend the vector, so we can not
19296       // optimize it away.
19297       if (SingleSource.getValueType() != N->getValueType(0))
19298         return SDValue();
19299     }
19300 
19301     // Check that we are reading from the identity index.
19302     unsigned IdentityIndex = i * PartNumElem;
19303     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
19304       return SDValue();
19305   }
19306 
19307   if (SingleSource.getNode())
19308     return SingleSource;
19309 
19310   return SDValue();
19311 }
19312 
19313 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
19314 // if the subvector can be sourced for free.
19315 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
19316   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
19317       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
19318     return V.getOperand(1);
19319   }
19320   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19321   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
19322       V.getOperand(0).getValueType() == SubVT &&
19323       (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
19324     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
19325     return V.getOperand(SubIdx);
19326   }
19327   return SDValue();
19328 }
19329 
19330 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
19331                                               SelectionDAG &DAG,
19332                                               bool LegalOperations) {
19333   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19334   SDValue BinOp = Extract->getOperand(0);
19335   unsigned BinOpcode = BinOp.getOpcode();
19336   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
19337     return SDValue();
19338 
19339   EVT VecVT = BinOp.getValueType();
19340   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
19341   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
19342     return SDValue();
19343 
19344   SDValue Index = Extract->getOperand(1);
19345   EVT SubVT = Extract->getValueType(0);
19346   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
19347     return SDValue();
19348 
19349   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
19350   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
19351 
19352   // TODO: We could handle the case where only 1 operand is being inserted by
19353   //       creating an extract of the other operand, but that requires checking
19354   //       number of uses and/or costs.
19355   if (!Sub0 || !Sub1)
19356     return SDValue();
19357 
19358   // We are inserting both operands of the wide binop only to extract back
19359   // to the narrow vector size. Eliminate all of the insert/extract:
19360   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
19361   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
19362                      BinOp->getFlags());
19363 }
19364 
19365 /// If we are extracting a subvector produced by a wide binary operator try
19366 /// to use a narrow binary operator and/or avoid concatenation and extraction.
19367 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
19368                                           bool LegalOperations) {
19369   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
19370   // some of these bailouts with other transforms.
19371 
19372   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
19373     return V;
19374 
19375   // The extract index must be a constant, so we can map it to a concat operand.
19376   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19377   if (!ExtractIndexC)
19378     return SDValue();
19379 
19380   // We are looking for an optionally bitcasted wide vector binary operator
19381   // feeding an extract subvector.
19382   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19383   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
19384   unsigned BOpcode = BinOp.getOpcode();
19385   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
19386     return SDValue();
19387 
19388   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
19389   // reduced to the unary fneg when it is visited, and we probably want to deal
19390   // with fneg in a target-specific way.
19391   if (BOpcode == ISD::FSUB) {
19392     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
19393     if (C && C->getValueAPF().isNegZero())
19394       return SDValue();
19395   }
19396 
19397   // The binop must be a vector type, so we can extract some fraction of it.
19398   EVT WideBVT = BinOp.getValueType();
19399   // The optimisations below currently assume we are dealing with fixed length
19400   // vectors. It is possible to add support for scalable vectors, but at the
19401   // moment we've done no analysis to prove whether they are profitable or not.
19402   if (!WideBVT.isFixedLengthVector())
19403     return SDValue();
19404 
19405   EVT VT = Extract->getValueType(0);
19406   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
19407   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
19408          "Extract index is not a multiple of the vector length.");
19409 
19410   // Bail out if this is not a proper multiple width extraction.
19411   unsigned WideWidth = WideBVT.getSizeInBits();
19412   unsigned NarrowWidth = VT.getSizeInBits();
19413   if (WideWidth % NarrowWidth != 0)
19414     return SDValue();
19415 
19416   // Bail out if we are extracting a fraction of a single operation. This can
19417   // occur because we potentially looked through a bitcast of the binop.
19418   unsigned NarrowingRatio = WideWidth / NarrowWidth;
19419   unsigned WideNumElts = WideBVT.getVectorNumElements();
19420   if (WideNumElts % NarrowingRatio != 0)
19421     return SDValue();
19422 
19423   // Bail out if the target does not support a narrower version of the binop.
19424   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
19425                                    WideNumElts / NarrowingRatio);
19426   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
19427     return SDValue();
19428 
19429   // If extraction is cheap, we don't need to look at the binop operands
19430   // for concat ops. The narrow binop alone makes this transform profitable.
19431   // We can't just reuse the original extract index operand because we may have
19432   // bitcasted.
19433   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
19434   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
19435   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
19436       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
19437     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
19438     SDLoc DL(Extract);
19439     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19440     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19441                             BinOp.getOperand(0), NewExtIndex);
19442     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19443                             BinOp.getOperand(1), NewExtIndex);
19444     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
19445                                       BinOp.getNode()->getFlags());
19446     return DAG.getBitcast(VT, NarrowBinOp);
19447   }
19448 
19449   // Only handle the case where we are doubling and then halving. A larger ratio
19450   // may require more than two narrow binops to replace the wide binop.
19451   if (NarrowingRatio != 2)
19452     return SDValue();
19453 
19454   // TODO: The motivating case for this transform is an x86 AVX1 target. That
19455   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
19456   // flavors, but no other 256-bit integer support. This could be extended to
19457   // handle any binop, but that may require fixing/adding other folds to avoid
19458   // codegen regressions.
19459   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
19460     return SDValue();
19461 
19462   // We need at least one concatenation operation of a binop operand to make
19463   // this transform worthwhile. The concat must double the input vector sizes.
19464   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
19465     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
19466       return V.getOperand(ConcatOpNum);
19467     return SDValue();
19468   };
19469   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
19470   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
19471 
19472   if (SubVecL || SubVecR) {
19473     // If a binop operand was not the result of a concat, we must extract a
19474     // half-sized operand for our new narrow binop:
19475     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
19476     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
19477     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
19478     SDLoc DL(Extract);
19479     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19480     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
19481                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19482                                       BinOp.getOperand(0), IndexC);
19483 
19484     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
19485                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19486                                       BinOp.getOperand(1), IndexC);
19487 
19488     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
19489     return DAG.getBitcast(VT, NarrowBinOp);
19490   }
19491 
19492   return SDValue();
19493 }
19494 
19495 /// If we are extracting a subvector from a wide vector load, convert to a
19496 /// narrow load to eliminate the extraction:
19497 /// (extract_subvector (load wide vector)) --> (load narrow vector)
19498 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
19499   // TODO: Add support for big-endian. The offset calculation must be adjusted.
19500   if (DAG.getDataLayout().isBigEndian())
19501     return SDValue();
19502 
19503   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
19504   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19505   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
19506       !ExtIdx)
19507     return SDValue();
19508 
19509   // Allow targets to opt-out.
19510   EVT VT = Extract->getValueType(0);
19511 
19512   // We can only create byte sized loads.
19513   if (!VT.isByteSized())
19514     return SDValue();
19515 
19516   unsigned Index = ExtIdx->getZExtValue();
19517   unsigned NumElts = VT.getVectorMinNumElements();
19518 
19519   // The definition of EXTRACT_SUBVECTOR states that the index must be a
19520   // multiple of the minimum number of elements in the result type.
19521   assert(Index % NumElts == 0 && "The extract subvector index is not a "
19522                                  "multiple of the result's element count");
19523 
19524   // It's fine to use TypeSize here as we know the offset will not be negative.
19525   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
19526 
19527   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19528   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
19529     return SDValue();
19530 
19531   // The narrow load will be offset from the base address of the old load if
19532   // we are extracting from something besides index 0 (little-endian).
19533   SDLoc DL(Extract);
19534 
19535   // TODO: Use "BaseIndexOffset" to make this more effective.
19536   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
19537 
19538   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
19539   MachineFunction &MF = DAG.getMachineFunction();
19540   MachineMemOperand *MMO;
19541   if (Offset.isScalable()) {
19542     MachinePointerInfo MPI =
19543         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
19544     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
19545   } else
19546     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
19547                                   StoreSize);
19548 
19549   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
19550   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
19551   return NewLd;
19552 }
19553 
19554 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
19555   EVT NVT = N->getValueType(0);
19556   SDValue V = N->getOperand(0);
19557   uint64_t ExtIdx = N->getConstantOperandVal(1);
19558 
19559   // Extract from UNDEF is UNDEF.
19560   if (V.isUndef())
19561     return DAG.getUNDEF(NVT);
19562 
19563   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
19564     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
19565       return NarrowLoad;
19566 
19567   // Combine an extract of an extract into a single extract_subvector.
19568   // ext (ext X, C), 0 --> ext X, C
19569   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
19570     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
19571                                     V.getConstantOperandVal(1)) &&
19572         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
19573       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
19574                          V.getOperand(1));
19575     }
19576   }
19577 
19578   // Try to move vector bitcast after extract_subv by scaling extraction index:
19579   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
19580   if (V.getOpcode() == ISD::BITCAST &&
19581       V.getOperand(0).getValueType().isVector()) {
19582     SDValue SrcOp = V.getOperand(0);
19583     EVT SrcVT = SrcOp.getValueType();
19584     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
19585     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
19586     if ((SrcNumElts % DestNumElts) == 0) {
19587       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
19588       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
19589       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
19590                                       NewExtEC);
19591       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19592         SDLoc DL(N);
19593         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
19594         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19595                                          V.getOperand(0), NewIndex);
19596         return DAG.getBitcast(NVT, NewExtract);
19597       }
19598     }
19599     if ((DestNumElts % SrcNumElts) == 0) {
19600       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
19601       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
19602         ElementCount NewExtEC =
19603             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
19604         EVT ScalarVT = SrcVT.getScalarType();
19605         if ((ExtIdx % DestSrcRatio) == 0) {
19606           SDLoc DL(N);
19607           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
19608           EVT NewExtVT =
19609               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
19610           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19611             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19612             SDValue NewExtract =
19613                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19614                             V.getOperand(0), NewIndex);
19615             return DAG.getBitcast(NVT, NewExtract);
19616           }
19617           if (NewExtEC.isScalar() &&
19618               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
19619             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19620             SDValue NewExtract =
19621                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
19622                             V.getOperand(0), NewIndex);
19623             return DAG.getBitcast(NVT, NewExtract);
19624           }
19625         }
19626       }
19627     }
19628   }
19629 
19630   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
19631     unsigned ExtNumElts = NVT.getVectorMinNumElements();
19632     EVT ConcatSrcVT = V.getOperand(0).getValueType();
19633     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
19634            "Concat and extract subvector do not change element type");
19635     assert((ExtIdx % ExtNumElts) == 0 &&
19636            "Extract index is not a multiple of the input vector length.");
19637 
19638     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
19639     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
19640 
19641     // If the concatenated source types match this extract, it's a direct
19642     // simplification:
19643     // extract_subvec (concat V1, V2, ...), i --> Vi
19644     if (ConcatSrcNumElts == ExtNumElts)
19645       return V.getOperand(ConcatOpIdx);
19646 
19647     // If the concatenated source vectors are a multiple length of this extract,
19648     // then extract a fraction of one of those source vectors directly from a
19649     // concat operand. Example:
19650     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
19651     //   v2i8 extract_subvec v8i8 Y, 6
19652     if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
19653       SDLoc DL(N);
19654       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
19655       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
19656              "Trying to extract from >1 concat operand?");
19657       assert(NewExtIdx % ExtNumElts == 0 &&
19658              "Extract index is not a multiple of the input vector length.");
19659       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
19660       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
19661                          V.getOperand(ConcatOpIdx), NewIndexC);
19662     }
19663   }
19664 
19665   V = peekThroughBitcasts(V);
19666 
19667   // If the input is a build vector. Try to make a smaller build vector.
19668   if (V.getOpcode() == ISD::BUILD_VECTOR) {
19669     EVT InVT = V.getValueType();
19670     unsigned ExtractSize = NVT.getSizeInBits();
19671     unsigned EltSize = InVT.getScalarSizeInBits();
19672     // Only do this if we won't split any elements.
19673     if (ExtractSize % EltSize == 0) {
19674       unsigned NumElems = ExtractSize / EltSize;
19675       EVT EltVT = InVT.getVectorElementType();
19676       EVT ExtractVT =
19677           NumElems == 1 ? EltVT
19678                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
19679       if ((Level < AfterLegalizeDAG ||
19680            (NumElems == 1 ||
19681             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
19682           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
19683         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
19684 
19685         if (NumElems == 1) {
19686           SDValue Src = V->getOperand(IdxVal);
19687           if (EltVT != Src.getValueType())
19688             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
19689           return DAG.getBitcast(NVT, Src);
19690         }
19691 
19692         // Extract the pieces from the original build_vector.
19693         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
19694                                               V->ops().slice(IdxVal, NumElems));
19695         return DAG.getBitcast(NVT, BuildVec);
19696       }
19697     }
19698   }
19699 
19700   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
19701     // Handle only simple case where vector being inserted and vector
19702     // being extracted are of same size.
19703     EVT SmallVT = V.getOperand(1).getValueType();
19704     if (!NVT.bitsEq(SmallVT))
19705       return SDValue();
19706 
19707     // Combine:
19708     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
19709     // Into:
19710     //    indices are equal or bit offsets are equal => V1
19711     //    otherwise => (extract_subvec V1, ExtIdx)
19712     uint64_t InsIdx = V.getConstantOperandVal(2);
19713     if (InsIdx * SmallVT.getScalarSizeInBits() ==
19714         ExtIdx * NVT.getScalarSizeInBits())
19715       return DAG.getBitcast(NVT, V.getOperand(1));
19716     return DAG.getNode(
19717         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
19718         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
19719         N->getOperand(1));
19720   }
19721 
19722   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
19723     return NarrowBOp;
19724 
19725   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19726     return SDValue(N, 0);
19727 
19728   return SDValue();
19729 }
19730 
19731 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
19732 /// followed by concatenation. Narrow vector ops may have better performance
19733 /// than wide ops, and this can unlock further narrowing of other vector ops.
19734 /// Targets can invert this transform later if it is not profitable.
19735 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
19736                                          SelectionDAG &DAG) {
19737   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
19738   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
19739       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
19740       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
19741     return SDValue();
19742 
19743   // Split the wide shuffle mask into halves. Any mask element that is accessing
19744   // operand 1 is offset down to account for narrowing of the vectors.
19745   ArrayRef<int> Mask = Shuf->getMask();
19746   EVT VT = Shuf->getValueType(0);
19747   unsigned NumElts = VT.getVectorNumElements();
19748   unsigned HalfNumElts = NumElts / 2;
19749   SmallVector<int, 16> Mask0(HalfNumElts, -1);
19750   SmallVector<int, 16> Mask1(HalfNumElts, -1);
19751   for (unsigned i = 0; i != NumElts; ++i) {
19752     if (Mask[i] == -1)
19753       continue;
19754     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
19755     if (i < HalfNumElts)
19756       Mask0[i] = M;
19757     else
19758       Mask1[i - HalfNumElts] = M;
19759   }
19760 
19761   // Ask the target if this is a valid transform.
19762   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19763   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
19764                                 HalfNumElts);
19765   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
19766       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
19767     return SDValue();
19768 
19769   // shuffle (concat X, undef), (concat Y, undef), Mask -->
19770   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
19771   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
19772   SDLoc DL(Shuf);
19773   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
19774   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
19775   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
19776 }
19777 
19778 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
19779 // or turn a shuffle of a single concat into simpler shuffle then concat.
19780 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
19781   EVT VT = N->getValueType(0);
19782   unsigned NumElts = VT.getVectorNumElements();
19783 
19784   SDValue N0 = N->getOperand(0);
19785   SDValue N1 = N->getOperand(1);
19786   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
19787   ArrayRef<int> Mask = SVN->getMask();
19788 
19789   SmallVector<SDValue, 4> Ops;
19790   EVT ConcatVT = N0.getOperand(0).getValueType();
19791   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
19792   unsigned NumConcats = NumElts / NumElemsPerConcat;
19793 
19794   auto IsUndefMaskElt = [](int i) { return i == -1; };
19795 
19796   // Special case: shuffle(concat(A,B)) can be more efficiently represented
19797   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
19798   // half vector elements.
19799   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
19800       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
19801                    IsUndefMaskElt)) {
19802     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
19803                               N0.getOperand(1),
19804                               Mask.slice(0, NumElemsPerConcat));
19805     N1 = DAG.getUNDEF(ConcatVT);
19806     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
19807   }
19808 
19809   // Look at every vector that's inserted. We're looking for exact
19810   // subvector-sized copies from a concatenated vector
19811   for (unsigned I = 0; I != NumConcats; ++I) {
19812     unsigned Begin = I * NumElemsPerConcat;
19813     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
19814 
19815     // Make sure we're dealing with a copy.
19816     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
19817       Ops.push_back(DAG.getUNDEF(ConcatVT));
19818       continue;
19819     }
19820 
19821     int OpIdx = -1;
19822     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
19823       if (IsUndefMaskElt(SubMask[i]))
19824         continue;
19825       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
19826         return SDValue();
19827       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
19828       if (0 <= OpIdx && EltOpIdx != OpIdx)
19829         return SDValue();
19830       OpIdx = EltOpIdx;
19831     }
19832     assert(0 <= OpIdx && "Unknown concat_vectors op");
19833 
19834     if (OpIdx < (int)N0.getNumOperands())
19835       Ops.push_back(N0.getOperand(OpIdx));
19836     else
19837       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
19838   }
19839 
19840   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19841 }
19842 
19843 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
19844 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
19845 //
19846 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
19847 // a simplification in some sense, but it isn't appropriate in general: some
19848 // BUILD_VECTORs are substantially cheaper than others. The general case
19849 // of a BUILD_VECTOR requires inserting each element individually (or
19850 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
19851 // all constants is a single constant pool load.  A BUILD_VECTOR where each
19852 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
19853 // are undef lowers to a small number of element insertions.
19854 //
19855 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
19856 // We don't fold shuffles where one side is a non-zero constant, and we don't
19857 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
19858 // non-constant operands. This seems to work out reasonably well in practice.
19859 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
19860                                        SelectionDAG &DAG,
19861                                        const TargetLowering &TLI) {
19862   EVT VT = SVN->getValueType(0);
19863   unsigned NumElts = VT.getVectorNumElements();
19864   SDValue N0 = SVN->getOperand(0);
19865   SDValue N1 = SVN->getOperand(1);
19866 
19867   if (!N0->hasOneUse())
19868     return SDValue();
19869 
19870   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
19871   // discussed above.
19872   if (!N1.isUndef()) {
19873     if (!N1->hasOneUse())
19874       return SDValue();
19875 
19876     bool N0AnyConst = isAnyConstantBuildVector(N0);
19877     bool N1AnyConst = isAnyConstantBuildVector(N1);
19878     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
19879       return SDValue();
19880     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
19881       return SDValue();
19882   }
19883 
19884   // If both inputs are splats of the same value then we can safely merge this
19885   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
19886   bool IsSplat = false;
19887   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
19888   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
19889   if (BV0 && BV1)
19890     if (SDValue Splat0 = BV0->getSplatValue())
19891       IsSplat = (Splat0 == BV1->getSplatValue());
19892 
19893   SmallVector<SDValue, 8> Ops;
19894   SmallSet<SDValue, 16> DuplicateOps;
19895   for (int M : SVN->getMask()) {
19896     SDValue Op = DAG.getUNDEF(VT.getScalarType());
19897     if (M >= 0) {
19898       int Idx = M < (int)NumElts ? M : M - NumElts;
19899       SDValue &S = (M < (int)NumElts ? N0 : N1);
19900       if (S.getOpcode() == ISD::BUILD_VECTOR) {
19901         Op = S.getOperand(Idx);
19902       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19903         SDValue Op0 = S.getOperand(0);
19904         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
19905       } else {
19906         // Operand can't be combined - bail out.
19907         return SDValue();
19908       }
19909     }
19910 
19911     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
19912     // generating a splat; semantically, this is fine, but it's likely to
19913     // generate low-quality code if the target can't reconstruct an appropriate
19914     // shuffle.
19915     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
19916       if (!IsSplat && !DuplicateOps.insert(Op).second)
19917         return SDValue();
19918 
19919     Ops.push_back(Op);
19920   }
19921 
19922   // BUILD_VECTOR requires all inputs to be of the same type, find the
19923   // maximum type and extend them all.
19924   EVT SVT = VT.getScalarType();
19925   if (SVT.isInteger())
19926     for (SDValue &Op : Ops)
19927       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
19928   if (SVT != VT.getScalarType())
19929     for (SDValue &Op : Ops)
19930       Op = TLI.isZExtFree(Op.getValueType(), SVT)
19931                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
19932                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
19933   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
19934 }
19935 
19936 // Match shuffles that can be converted to any_vector_extend_in_reg.
19937 // This is often generated during legalization.
19938 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
19939 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
19940 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
19941                                             SelectionDAG &DAG,
19942                                             const TargetLowering &TLI,
19943                                             bool LegalOperations) {
19944   EVT VT = SVN->getValueType(0);
19945   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19946 
19947   // TODO Add support for big-endian when we have a test case.
19948   if (!VT.isInteger() || IsBigEndian)
19949     return SDValue();
19950 
19951   unsigned NumElts = VT.getVectorNumElements();
19952   unsigned EltSizeInBits = VT.getScalarSizeInBits();
19953   ArrayRef<int> Mask = SVN->getMask();
19954   SDValue N0 = SVN->getOperand(0);
19955 
19956   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
19957   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
19958     for (unsigned i = 0; i != NumElts; ++i) {
19959       if (Mask[i] < 0)
19960         continue;
19961       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
19962         continue;
19963       return false;
19964     }
19965     return true;
19966   };
19967 
19968   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
19969   // power-of-2 extensions as they are the most likely.
19970   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
19971     // Check for non power of 2 vector sizes
19972     if (NumElts % Scale != 0)
19973       continue;
19974     if (!isAnyExtend(Scale))
19975       continue;
19976 
19977     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
19978     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
19979     // Never create an illegal type. Only create unsupported operations if we
19980     // are pre-legalization.
19981     if (TLI.isTypeLegal(OutVT))
19982       if (!LegalOperations ||
19983           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
19984         return DAG.getBitcast(VT,
19985                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
19986                                           SDLoc(SVN), OutVT, N0));
19987   }
19988 
19989   return SDValue();
19990 }
19991 
19992 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
19993 // each source element of a large type into the lowest elements of a smaller
19994 // destination type. This is often generated during legalization.
19995 // If the source node itself was a '*_extend_vector_inreg' node then we should
19996 // then be able to remove it.
19997 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
19998                                         SelectionDAG &DAG) {
19999   EVT VT = SVN->getValueType(0);
20000   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20001 
20002   // TODO Add support for big-endian when we have a test case.
20003   if (!VT.isInteger() || IsBigEndian)
20004     return SDValue();
20005 
20006   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20007 
20008   unsigned Opcode = N0.getOpcode();
20009   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20010       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20011       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20012     return SDValue();
20013 
20014   SDValue N00 = N0.getOperand(0);
20015   ArrayRef<int> Mask = SVN->getMask();
20016   unsigned NumElts = VT.getVectorNumElements();
20017   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20018   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20019   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20020 
20021   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20022     return SDValue();
20023   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
20024 
20025   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
20026   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
20027   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
20028   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
20029     for (unsigned i = 0; i != NumElts; ++i) {
20030       if (Mask[i] < 0)
20031         continue;
20032       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
20033         continue;
20034       return false;
20035     }
20036     return true;
20037   };
20038 
20039   // At the moment we just handle the case where we've truncated back to the
20040   // same size as before the extension.
20041   // TODO: handle more extension/truncation cases as cases arise.
20042   if (EltSizeInBits != ExtSrcSizeInBits)
20043     return SDValue();
20044 
20045   // We can remove *extend_vector_inreg only if the truncation happens at
20046   // the same scale as the extension.
20047   if (isTruncate(ExtScale))
20048     return DAG.getBitcast(VT, N00);
20049 
20050   return SDValue();
20051 }
20052 
20053 // Combine shuffles of splat-shuffles of the form:
20054 // shuffle (shuffle V, undef, splat-mask), undef, M
20055 // If splat-mask contains undef elements, we need to be careful about
20056 // introducing undef's in the folded mask which are not the result of composing
20057 // the masks of the shuffles.
20058 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
20059                                         SelectionDAG &DAG) {
20060   if (!Shuf->getOperand(1).isUndef())
20061     return SDValue();
20062   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20063   if (!Splat || !Splat->isSplat())
20064     return SDValue();
20065 
20066   ArrayRef<int> ShufMask = Shuf->getMask();
20067   ArrayRef<int> SplatMask = Splat->getMask();
20068   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
20069 
20070   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
20071   // every undef mask element in the splat-shuffle has a corresponding undef
20072   // element in the user-shuffle's mask or if the composition of mask elements
20073   // would result in undef.
20074   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
20075   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
20076   //   In this case it is not legal to simplify to the splat-shuffle because we
20077   //   may be exposing the users of the shuffle an undef element at index 1
20078   //   which was not there before the combine.
20079   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
20080   //   In this case the composition of masks yields SplatMask, so it's ok to
20081   //   simplify to the splat-shuffle.
20082   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
20083   //   In this case the composed mask includes all undef elements of SplatMask
20084   //   and in addition sets element zero to undef. It is safe to simplify to
20085   //   the splat-shuffle.
20086   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
20087                                        ArrayRef<int> SplatMask) {
20088     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
20089       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
20090           SplatMask[UserMask[i]] != -1)
20091         return false;
20092     return true;
20093   };
20094   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
20095     return Shuf->getOperand(0);
20096 
20097   // Create a new shuffle with a mask that is composed of the two shuffles'
20098   // masks.
20099   SmallVector<int, 32> NewMask;
20100   for (int Idx : ShufMask)
20101     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
20102 
20103   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
20104                               Splat->getOperand(0), Splat->getOperand(1),
20105                               NewMask);
20106 }
20107 
20108 /// Combine shuffle of shuffle of the form:
20109 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
20110 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
20111                                      SelectionDAG &DAG) {
20112   if (!OuterShuf->getOperand(1).isUndef())
20113     return SDValue();
20114   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
20115   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
20116     return SDValue();
20117 
20118   ArrayRef<int> OuterMask = OuterShuf->getMask();
20119   ArrayRef<int> InnerMask = InnerShuf->getMask();
20120   unsigned NumElts = OuterMask.size();
20121   assert(NumElts == InnerMask.size() && "Mask length mismatch");
20122   SmallVector<int, 32> CombinedMask(NumElts, -1);
20123   int SplatIndex = -1;
20124   for (unsigned i = 0; i != NumElts; ++i) {
20125     // Undef lanes remain undef.
20126     int OuterMaskElt = OuterMask[i];
20127     if (OuterMaskElt == -1)
20128       continue;
20129 
20130     // Peek through the shuffle masks to get the underlying source element.
20131     int InnerMaskElt = InnerMask[OuterMaskElt];
20132     if (InnerMaskElt == -1)
20133       continue;
20134 
20135     // Initialize the splatted element.
20136     if (SplatIndex == -1)
20137       SplatIndex = InnerMaskElt;
20138 
20139     // Non-matching index - this is not a splat.
20140     if (SplatIndex != InnerMaskElt)
20141       return SDValue();
20142 
20143     CombinedMask[i] = InnerMaskElt;
20144   }
20145   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
20146           getSplatIndex(CombinedMask) != -1) &&
20147          "Expected a splat mask");
20148 
20149   // TODO: The transform may be a win even if the mask is not legal.
20150   EVT VT = OuterShuf->getValueType(0);
20151   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
20152   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
20153     return SDValue();
20154 
20155   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
20156                               InnerShuf->getOperand(1), CombinedMask);
20157 }
20158 
20159 /// If the shuffle mask is taking exactly one element from the first vector
20160 /// operand and passing through all other elements from the second vector
20161 /// operand, return the index of the mask element that is choosing an element
20162 /// from the first operand. Otherwise, return -1.
20163 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
20164   int MaskSize = Mask.size();
20165   int EltFromOp0 = -1;
20166   // TODO: This does not match if there are undef elements in the shuffle mask.
20167   // Should we ignore undefs in the shuffle mask instead? The trade-off is
20168   // removing an instruction (a shuffle), but losing the knowledge that some
20169   // vector lanes are not needed.
20170   for (int i = 0; i != MaskSize; ++i) {
20171     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
20172       // We're looking for a shuffle of exactly one element from operand 0.
20173       if (EltFromOp0 != -1)
20174         return -1;
20175       EltFromOp0 = i;
20176     } else if (Mask[i] != i + MaskSize) {
20177       // Nothing from operand 1 can change lanes.
20178       return -1;
20179     }
20180   }
20181   return EltFromOp0;
20182 }
20183 
20184 /// If a shuffle inserts exactly one element from a source vector operand into
20185 /// another vector operand and we can access the specified element as a scalar,
20186 /// then we can eliminate the shuffle.
20187 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
20188                                       SelectionDAG &DAG) {
20189   // First, check if we are taking one element of a vector and shuffling that
20190   // element into another vector.
20191   ArrayRef<int> Mask = Shuf->getMask();
20192   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
20193   SDValue Op0 = Shuf->getOperand(0);
20194   SDValue Op1 = Shuf->getOperand(1);
20195   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
20196   if (ShufOp0Index == -1) {
20197     // Commute mask and check again.
20198     ShuffleVectorSDNode::commuteMask(CommutedMask);
20199     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
20200     if (ShufOp0Index == -1)
20201       return SDValue();
20202     // Commute operands to match the commuted shuffle mask.
20203     std::swap(Op0, Op1);
20204     Mask = CommutedMask;
20205   }
20206 
20207   // The shuffle inserts exactly one element from operand 0 into operand 1.
20208   // Now see if we can access that element as a scalar via a real insert element
20209   // instruction.
20210   // TODO: We can try harder to locate the element as a scalar. Examples: it
20211   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
20212   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
20213          "Shuffle mask value must be from operand 0");
20214   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
20215     return SDValue();
20216 
20217   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
20218   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
20219     return SDValue();
20220 
20221   // There's an existing insertelement with constant insertion index, so we
20222   // don't need to check the legality/profitability of a replacement operation
20223   // that differs at most in the constant value. The target should be able to
20224   // lower any of those in a similar way. If not, legalization will expand this
20225   // to a scalar-to-vector plus shuffle.
20226   //
20227   // Note that the shuffle may move the scalar from the position that the insert
20228   // element used. Therefore, our new insert element occurs at the shuffle's
20229   // mask index value, not the insert's index value.
20230   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
20231   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
20232   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
20233                      Op1, Op0.getOperand(1), NewInsIndex);
20234 }
20235 
20236 /// If we have a unary shuffle of a shuffle, see if it can be folded away
20237 /// completely. This has the potential to lose undef knowledge because the first
20238 /// shuffle may not have an undef mask element where the second one does. So
20239 /// only call this after doing simplifications based on demanded elements.
20240 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
20241   // shuf (shuf0 X, Y, Mask0), undef, Mask
20242   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20243   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
20244     return SDValue();
20245 
20246   ArrayRef<int> Mask = Shuf->getMask();
20247   ArrayRef<int> Mask0 = Shuf0->getMask();
20248   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
20249     // Ignore undef elements.
20250     if (Mask[i] == -1)
20251       continue;
20252     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
20253 
20254     // Is the element of the shuffle operand chosen by this shuffle the same as
20255     // the element chosen by the shuffle operand itself?
20256     if (Mask0[Mask[i]] != Mask0[i])
20257       return SDValue();
20258   }
20259   // Every element of this shuffle is identical to the result of the previous
20260   // shuffle, so we can replace this value.
20261   return Shuf->getOperand(0);
20262 }
20263 
20264 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
20265   EVT VT = N->getValueType(0);
20266   unsigned NumElts = VT.getVectorNumElements();
20267 
20268   SDValue N0 = N->getOperand(0);
20269   SDValue N1 = N->getOperand(1);
20270 
20271   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
20272 
20273   // Canonicalize shuffle undef, undef -> undef
20274   if (N0.isUndef() && N1.isUndef())
20275     return DAG.getUNDEF(VT);
20276 
20277   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20278 
20279   // Canonicalize shuffle v, v -> v, undef
20280   if (N0 == N1) {
20281     SmallVector<int, 8> NewMask;
20282     for (unsigned i = 0; i != NumElts; ++i) {
20283       int Idx = SVN->getMaskElt(i);
20284       if (Idx >= (int)NumElts) Idx -= NumElts;
20285       NewMask.push_back(Idx);
20286     }
20287     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
20288   }
20289 
20290   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
20291   if (N0.isUndef())
20292     return DAG.getCommutedVectorShuffle(*SVN);
20293 
20294   // Remove references to rhs if it is undef
20295   if (N1.isUndef()) {
20296     bool Changed = false;
20297     SmallVector<int, 8> NewMask;
20298     for (unsigned i = 0; i != NumElts; ++i) {
20299       int Idx = SVN->getMaskElt(i);
20300       if (Idx >= (int)NumElts) {
20301         Idx = -1;
20302         Changed = true;
20303       }
20304       NewMask.push_back(Idx);
20305     }
20306     if (Changed)
20307       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
20308   }
20309 
20310   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
20311     return InsElt;
20312 
20313   // A shuffle of a single vector that is a splatted value can always be folded.
20314   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
20315     return V;
20316 
20317   if (SDValue V = formSplatFromShuffles(SVN, DAG))
20318     return V;
20319 
20320   // If it is a splat, check if the argument vector is another splat or a
20321   // build_vector.
20322   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
20323     int SplatIndex = SVN->getSplatIndex();
20324     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
20325         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
20326       // splat (vector_bo L, R), Index -->
20327       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
20328       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
20329       SDLoc DL(N);
20330       EVT EltVT = VT.getScalarType();
20331       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
20332       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
20333       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
20334       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
20335                                   N0.getNode()->getFlags());
20336       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
20337       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
20338       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
20339     }
20340 
20341     // If this is a bit convert that changes the element type of the vector but
20342     // not the number of vector elements, look through it.  Be careful not to
20343     // look though conversions that change things like v4f32 to v2f64.
20344     SDNode *V = N0.getNode();
20345     if (V->getOpcode() == ISD::BITCAST) {
20346       SDValue ConvInput = V->getOperand(0);
20347       if (ConvInput.getValueType().isVector() &&
20348           ConvInput.getValueType().getVectorNumElements() == NumElts)
20349         V = ConvInput.getNode();
20350     }
20351 
20352     if (V->getOpcode() == ISD::BUILD_VECTOR) {
20353       assert(V->getNumOperands() == NumElts &&
20354              "BUILD_VECTOR has wrong number of operands");
20355       SDValue Base;
20356       bool AllSame = true;
20357       for (unsigned i = 0; i != NumElts; ++i) {
20358         if (!V->getOperand(i).isUndef()) {
20359           Base = V->getOperand(i);
20360           break;
20361         }
20362       }
20363       // Splat of <u, u, u, u>, return <u, u, u, u>
20364       if (!Base.getNode())
20365         return N0;
20366       for (unsigned i = 0; i != NumElts; ++i) {
20367         if (V->getOperand(i) != Base) {
20368           AllSame = false;
20369           break;
20370         }
20371       }
20372       // Splat of <x, x, x, x>, return <x, x, x, x>
20373       if (AllSame)
20374         return N0;
20375 
20376       // Canonicalize any other splat as a build_vector.
20377       SDValue Splatted = V->getOperand(SplatIndex);
20378       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
20379       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
20380 
20381       // We may have jumped through bitcasts, so the type of the
20382       // BUILD_VECTOR may not match the type of the shuffle.
20383       if (V->getValueType(0) != VT)
20384         NewBV = DAG.getBitcast(VT, NewBV);
20385       return NewBV;
20386     }
20387   }
20388 
20389   // Simplify source operands based on shuffle mask.
20390   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20391     return SDValue(N, 0);
20392 
20393   // This is intentionally placed after demanded elements simplification because
20394   // it could eliminate knowledge of undef elements created by this shuffle.
20395   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
20396     return ShufOp;
20397 
20398   // Match shuffles that can be converted to any_vector_extend_in_reg.
20399   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
20400     return V;
20401 
20402   // Combine "truncate_vector_in_reg" style shuffles.
20403   if (SDValue V = combineTruncationShuffle(SVN, DAG))
20404     return V;
20405 
20406   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
20407       Level < AfterLegalizeVectorOps &&
20408       (N1.isUndef() ||
20409       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
20410        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
20411     if (SDValue V = partitionShuffleOfConcats(N, DAG))
20412       return V;
20413   }
20414 
20415   // A shuffle of a concat of the same narrow vector can be reduced to use
20416   // only low-half elements of a concat with undef:
20417   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
20418   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
20419       N0.getNumOperands() == 2 &&
20420       N0.getOperand(0) == N0.getOperand(1)) {
20421     int HalfNumElts = (int)NumElts / 2;
20422     SmallVector<int, 8> NewMask;
20423     for (unsigned i = 0; i != NumElts; ++i) {
20424       int Idx = SVN->getMaskElt(i);
20425       if (Idx >= HalfNumElts) {
20426         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
20427         Idx -= HalfNumElts;
20428       }
20429       NewMask.push_back(Idx);
20430     }
20431     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
20432       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
20433       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
20434                                    N0.getOperand(0), UndefVec);
20435       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
20436     }
20437   }
20438 
20439   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20440   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20441   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
20442     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
20443       return Res;
20444 
20445   // If this shuffle only has a single input that is a bitcasted shuffle,
20446   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
20447   // back to their original types.
20448   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
20449       N1.isUndef() && Level < AfterLegalizeVectorOps &&
20450       TLI.isTypeLegal(VT)) {
20451 
20452     SDValue BC0 = peekThroughOneUseBitcasts(N0);
20453     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
20454       EVT SVT = VT.getScalarType();
20455       EVT InnerVT = BC0->getValueType(0);
20456       EVT InnerSVT = InnerVT.getScalarType();
20457 
20458       // Determine which shuffle works with the smaller scalar type.
20459       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
20460       EVT ScaleSVT = ScaleVT.getScalarType();
20461 
20462       if (TLI.isTypeLegal(ScaleVT) &&
20463           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
20464           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
20465         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20466         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20467 
20468         // Scale the shuffle masks to the smaller scalar type.
20469         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
20470         SmallVector<int, 8> InnerMask;
20471         SmallVector<int, 8> OuterMask;
20472         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
20473         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
20474 
20475         // Merge the shuffle masks.
20476         SmallVector<int, 8> NewMask;
20477         for (int M : OuterMask)
20478           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
20479 
20480         // Test for shuffle mask legality over both commutations.
20481         SDValue SV0 = BC0->getOperand(0);
20482         SDValue SV1 = BC0->getOperand(1);
20483         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20484         if (!LegalMask) {
20485           std::swap(SV0, SV1);
20486           ShuffleVectorSDNode::commuteMask(NewMask);
20487           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20488         }
20489 
20490         if (LegalMask) {
20491           SV0 = DAG.getBitcast(ScaleVT, SV0);
20492           SV1 = DAG.getBitcast(ScaleVT, SV1);
20493           return DAG.getBitcast(
20494               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
20495         }
20496       }
20497     }
20498   }
20499 
20500   // Canonicalize shuffles according to rules:
20501   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
20502   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
20503   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
20504   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
20505       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
20506       TLI.isTypeLegal(VT)) {
20507     // The incoming shuffle must be of the same type as the result of the
20508     // current shuffle.
20509     assert(N1->getOperand(0).getValueType() == VT &&
20510            "Shuffle types don't match");
20511 
20512     SDValue SV0 = N1->getOperand(0);
20513     SDValue SV1 = N1->getOperand(1);
20514     bool HasSameOp0 = N0 == SV0;
20515     bool IsSV1Undef = SV1.isUndef();
20516     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
20517       // Commute the operands of this shuffle so that next rule
20518       // will trigger.
20519       return DAG.getCommutedVectorShuffle(*SVN);
20520   }
20521 
20522   // Try to fold according to rules:
20523   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20524   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20525   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20526   // Don't try to fold shuffles with illegal type.
20527   // Only fold if this shuffle is the only user of the other shuffle.
20528   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
20529       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
20530     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
20531 
20532     // Don't try to fold splats; they're likely to simplify somehow, or they
20533     // might be free.
20534     if (OtherSV->isSplat())
20535       return SDValue();
20536 
20537     // The incoming shuffle must be of the same type as the result of the
20538     // current shuffle.
20539     assert(OtherSV->getOperand(0).getValueType() == VT &&
20540            "Shuffle types don't match");
20541 
20542     SDValue SV0, SV1;
20543     SmallVector<int, 4> Mask;
20544     // Compute the combined shuffle mask for a shuffle with SV0 as the first
20545     // operand, and SV1 as the second operand.
20546     for (unsigned i = 0; i != NumElts; ++i) {
20547       int Idx = SVN->getMaskElt(i);
20548       if (Idx < 0) {
20549         // Propagate Undef.
20550         Mask.push_back(Idx);
20551         continue;
20552       }
20553 
20554       SDValue CurrentVec;
20555       if (Idx < (int)NumElts) {
20556         // This shuffle index refers to the inner shuffle N0. Lookup the inner
20557         // shuffle mask to identify which vector is actually referenced.
20558         Idx = OtherSV->getMaskElt(Idx);
20559         if (Idx < 0) {
20560           // Propagate Undef.
20561           Mask.push_back(Idx);
20562           continue;
20563         }
20564 
20565         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
20566                                            : OtherSV->getOperand(1);
20567       } else {
20568         // This shuffle index references an element within N1.
20569         CurrentVec = N1;
20570       }
20571 
20572       // Simple case where 'CurrentVec' is UNDEF.
20573       if (CurrentVec.isUndef()) {
20574         Mask.push_back(-1);
20575         continue;
20576       }
20577 
20578       // Canonicalize the shuffle index. We don't know yet if CurrentVec
20579       // will be the first or second operand of the combined shuffle.
20580       Idx = Idx % NumElts;
20581       if (!SV0.getNode() || SV0 == CurrentVec) {
20582         // Ok. CurrentVec is the left hand side.
20583         // Update the mask accordingly.
20584         SV0 = CurrentVec;
20585         Mask.push_back(Idx);
20586         continue;
20587       }
20588 
20589       // Bail out if we cannot convert the shuffle pair into a single shuffle.
20590       if (SV1.getNode() && SV1 != CurrentVec)
20591         return SDValue();
20592 
20593       // Ok. CurrentVec is the right hand side.
20594       // Update the mask accordingly.
20595       SV1 = CurrentVec;
20596       Mask.push_back(Idx + NumElts);
20597     }
20598 
20599     // Check if all indices in Mask are Undef. In case, propagate Undef.
20600     bool isUndefMask = true;
20601     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
20602       isUndefMask &= Mask[i] < 0;
20603 
20604     if (isUndefMask)
20605       return DAG.getUNDEF(VT);
20606 
20607     if (!SV0.getNode())
20608       SV0 = DAG.getUNDEF(VT);
20609     if (!SV1.getNode())
20610       SV1 = DAG.getUNDEF(VT);
20611 
20612     // Avoid introducing shuffles with illegal mask.
20613     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20614     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20615     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20616     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
20617     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
20618     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
20619     return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
20620   }
20621 
20622   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
20623     return V;
20624 
20625   return SDValue();
20626 }
20627 
20628 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
20629   SDValue InVal = N->getOperand(0);
20630   EVT VT = N->getValueType(0);
20631 
20632   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
20633   // with a VECTOR_SHUFFLE and possible truncate.
20634   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20635       VT.isFixedLengthVector() &&
20636       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
20637     SDValue InVec = InVal->getOperand(0);
20638     SDValue EltNo = InVal->getOperand(1);
20639     auto InVecT = InVec.getValueType();
20640     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
20641       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
20642       int Elt = C0->getZExtValue();
20643       NewMask[0] = Elt;
20644       // If we have an implict truncate do truncate here as long as it's legal.
20645       // if it's not legal, this should
20646       if (VT.getScalarType() != InVal.getValueType() &&
20647           InVal.getValueType().isScalarInteger() &&
20648           isTypeLegal(VT.getScalarType())) {
20649         SDValue Val =
20650             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
20651         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
20652       }
20653       if (VT.getScalarType() == InVecT.getScalarType() &&
20654           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
20655         SDValue LegalShuffle =
20656           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
20657                                       DAG.getUNDEF(InVecT), NewMask, DAG);
20658         if (LegalShuffle) {
20659           // If the initial vector is the correct size this shuffle is a
20660           // valid result.
20661           if (VT == InVecT)
20662             return LegalShuffle;
20663           // If not we must truncate the vector.
20664           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
20665             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
20666             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
20667                                          InVecT.getVectorElementType(),
20668                                          VT.getVectorNumElements());
20669             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
20670                                LegalShuffle, ZeroIdx);
20671           }
20672         }
20673       }
20674     }
20675   }
20676 
20677   return SDValue();
20678 }
20679 
20680 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
20681   EVT VT = N->getValueType(0);
20682   SDValue N0 = N->getOperand(0);
20683   SDValue N1 = N->getOperand(1);
20684   SDValue N2 = N->getOperand(2);
20685   uint64_t InsIdx = N->getConstantOperandVal(2);
20686 
20687   // If inserting an UNDEF, just return the original vector.
20688   if (N1.isUndef())
20689     return N0;
20690 
20691   // If this is an insert of an extracted vector into an undef vector, we can
20692   // just use the input to the extract.
20693   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20694       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
20695     return N1.getOperand(0);
20696 
20697   // If we are inserting a bitcast value into an undef, with the same
20698   // number of elements, just use the bitcast input of the extract.
20699   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
20700   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
20701   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
20702       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20703       N1.getOperand(0).getOperand(1) == N2 &&
20704       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
20705           VT.getVectorNumElements() &&
20706       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
20707           VT.getSizeInBits()) {
20708     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
20709   }
20710 
20711   // If both N1 and N2 are bitcast values on which insert_subvector
20712   // would makes sense, pull the bitcast through.
20713   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
20714   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
20715   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
20716     SDValue CN0 = N0.getOperand(0);
20717     SDValue CN1 = N1.getOperand(0);
20718     EVT CN0VT = CN0.getValueType();
20719     EVT CN1VT = CN1.getValueType();
20720     if (CN0VT.isVector() && CN1VT.isVector() &&
20721         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
20722         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
20723       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
20724                                       CN0.getValueType(), CN0, CN1, N2);
20725       return DAG.getBitcast(VT, NewINSERT);
20726     }
20727   }
20728 
20729   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
20730   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
20731   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
20732   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
20733       N0.getOperand(1).getValueType() == N1.getValueType() &&
20734       N0.getOperand(2) == N2)
20735     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
20736                        N1, N2);
20737 
20738   // Eliminate an intermediate insert into an undef vector:
20739   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
20740   // insert_subvector undef, X, N2
20741   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
20742       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
20743     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
20744                        N1.getOperand(1), N2);
20745 
20746   // Push subvector bitcasts to the output, adjusting the index as we go.
20747   // insert_subvector(bitcast(v), bitcast(s), c1)
20748   // -> bitcast(insert_subvector(v, s, c2))
20749   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
20750       N1.getOpcode() == ISD::BITCAST) {
20751     SDValue N0Src = peekThroughBitcasts(N0);
20752     SDValue N1Src = peekThroughBitcasts(N1);
20753     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
20754     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
20755     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
20756         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
20757       EVT NewVT;
20758       SDLoc DL(N);
20759       SDValue NewIdx;
20760       LLVMContext &Ctx = *DAG.getContext();
20761       ElementCount NumElts = VT.getVectorElementCount();
20762       unsigned EltSizeInBits = VT.getScalarSizeInBits();
20763       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
20764         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
20765         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
20766         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
20767       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
20768         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
20769         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
20770           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
20771                                    NumElts.divideCoefficientBy(Scale));
20772           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
20773         }
20774       }
20775       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
20776         SDValue Res = DAG.getBitcast(NewVT, N0Src);
20777         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
20778         return DAG.getBitcast(VT, Res);
20779       }
20780     }
20781   }
20782 
20783   // Canonicalize insert_subvector dag nodes.
20784   // Example:
20785   // (insert_subvector (insert_subvector A, Idx0), Idx1)
20786   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
20787   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
20788       N1.getValueType() == N0.getOperand(1).getValueType()) {
20789     unsigned OtherIdx = N0.getConstantOperandVal(2);
20790     if (InsIdx < OtherIdx) {
20791       // Swap nodes.
20792       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
20793                                   N0.getOperand(0), N1, N2);
20794       AddToWorklist(NewOp.getNode());
20795       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
20796                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
20797     }
20798   }
20799 
20800   // If the input vector is a concatenation, and the insert replaces
20801   // one of the pieces, we can optimize into a single concat_vectors.
20802   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
20803       N0.getOperand(0).getValueType() == N1.getValueType()) {
20804     unsigned Factor = N1.getValueType().getVectorNumElements();
20805     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
20806     Ops[InsIdx / Factor] = N1;
20807     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20808   }
20809 
20810   // Simplify source operands based on insertion.
20811   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20812     return SDValue(N, 0);
20813 
20814   return SDValue();
20815 }
20816 
20817 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
20818   SDValue N0 = N->getOperand(0);
20819 
20820   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
20821   if (N0->getOpcode() == ISD::FP16_TO_FP)
20822     return N0->getOperand(0);
20823 
20824   return SDValue();
20825 }
20826 
20827 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
20828   SDValue N0 = N->getOperand(0);
20829 
20830   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
20831   if (N0->getOpcode() == ISD::AND) {
20832     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
20833     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
20834       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
20835                          N0.getOperand(0));
20836     }
20837   }
20838 
20839   return SDValue();
20840 }
20841 
20842 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
20843   SDValue N0 = N->getOperand(0);
20844   EVT VT = N0.getValueType();
20845   unsigned Opcode = N->getOpcode();
20846 
20847   // VECREDUCE over 1-element vector is just an extract.
20848   if (VT.getVectorNumElements() == 1) {
20849     SDLoc dl(N);
20850     SDValue Res =
20851         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
20852                     DAG.getVectorIdxConstant(0, dl));
20853     if (Res.getValueType() != N->getValueType(0))
20854       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
20855     return Res;
20856   }
20857 
20858   // On an boolean vector an and/or reduction is the same as a umin/umax
20859   // reduction. Convert them if the latter is legal while the former isn't.
20860   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
20861     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
20862         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
20863     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
20864         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
20865         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
20866       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
20867   }
20868 
20869   return SDValue();
20870 }
20871 
20872 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
20873 /// with the destination vector and a zero vector.
20874 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
20875 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
20876 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
20877   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
20878 
20879   EVT VT = N->getValueType(0);
20880   SDValue LHS = N->getOperand(0);
20881   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
20882   SDLoc DL(N);
20883 
20884   // Make sure we're not running after operation legalization where it
20885   // may have custom lowered the vector shuffles.
20886   if (LegalOperations)
20887     return SDValue();
20888 
20889   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
20890     return SDValue();
20891 
20892   EVT RVT = RHS.getValueType();
20893   unsigned NumElts = RHS.getNumOperands();
20894 
20895   // Attempt to create a valid clear mask, splitting the mask into
20896   // sub elements and checking to see if each is
20897   // all zeros or all ones - suitable for shuffle masking.
20898   auto BuildClearMask = [&](int Split) {
20899     int NumSubElts = NumElts * Split;
20900     int NumSubBits = RVT.getScalarSizeInBits() / Split;
20901 
20902     SmallVector<int, 8> Indices;
20903     for (int i = 0; i != NumSubElts; ++i) {
20904       int EltIdx = i / Split;
20905       int SubIdx = i % Split;
20906       SDValue Elt = RHS.getOperand(EltIdx);
20907       // X & undef --> 0 (not undef). So this lane must be converted to choose
20908       // from the zero constant vector (same as if the element had all 0-bits).
20909       if (Elt.isUndef()) {
20910         Indices.push_back(i + NumSubElts);
20911         continue;
20912       }
20913 
20914       APInt Bits;
20915       if (isa<ConstantSDNode>(Elt))
20916         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
20917       else if (isa<ConstantFPSDNode>(Elt))
20918         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
20919       else
20920         return SDValue();
20921 
20922       // Extract the sub element from the constant bit mask.
20923       if (DAG.getDataLayout().isBigEndian())
20924         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
20925       else
20926         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
20927 
20928       if (Bits.isAllOnesValue())
20929         Indices.push_back(i);
20930       else if (Bits == 0)
20931         Indices.push_back(i + NumSubElts);
20932       else
20933         return SDValue();
20934     }
20935 
20936     // Let's see if the target supports this vector_shuffle.
20937     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
20938     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
20939     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
20940       return SDValue();
20941 
20942     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
20943     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
20944                                                    DAG.getBitcast(ClearVT, LHS),
20945                                                    Zero, Indices));
20946   };
20947 
20948   // Determine maximum split level (byte level masking).
20949   int MaxSplit = 1;
20950   if (RVT.getScalarSizeInBits() % 8 == 0)
20951     MaxSplit = RVT.getScalarSizeInBits() / 8;
20952 
20953   for (int Split = 1; Split <= MaxSplit; ++Split)
20954     if (RVT.getScalarSizeInBits() % Split == 0)
20955       if (SDValue S = BuildClearMask(Split))
20956         return S;
20957 
20958   return SDValue();
20959 }
20960 
20961 /// If a vector binop is performed on splat values, it may be profitable to
20962 /// extract, scalarize, and insert/splat.
20963 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
20964   SDValue N0 = N->getOperand(0);
20965   SDValue N1 = N->getOperand(1);
20966   unsigned Opcode = N->getOpcode();
20967   EVT VT = N->getValueType(0);
20968   EVT EltVT = VT.getVectorElementType();
20969   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20970 
20971   // TODO: Remove/replace the extract cost check? If the elements are available
20972   //       as scalars, then there may be no extract cost. Should we ask if
20973   //       inserting a scalar back into a vector is cheap instead?
20974   int Index0, Index1;
20975   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
20976   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
20977   if (!Src0 || !Src1 || Index0 != Index1 ||
20978       Src0.getValueType().getVectorElementType() != EltVT ||
20979       Src1.getValueType().getVectorElementType() != EltVT ||
20980       !TLI.isExtractVecEltCheap(VT, Index0) ||
20981       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
20982     return SDValue();
20983 
20984   SDLoc DL(N);
20985   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
20986   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
20987   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
20988   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
20989 
20990   // If all lanes but 1 are undefined, no need to splat the scalar result.
20991   // TODO: Keep track of undefs and use that info in the general case.
20992   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
20993       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
20994       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
20995     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
20996     // build_vec ..undef, (bo X, Y), undef...
20997     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
20998     Ops[Index0] = ScalarBO;
20999     return DAG.getBuildVector(VT, DL, Ops);
21000   }
21001 
21002   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
21003   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
21004   return DAG.getBuildVector(VT, DL, Ops);
21005 }
21006 
21007 /// Visit a binary vector operation, like ADD.
21008 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
21009   assert(N->getValueType(0).isVector() &&
21010          "SimplifyVBinOp only works on vectors!");
21011 
21012   SDValue LHS = N->getOperand(0);
21013   SDValue RHS = N->getOperand(1);
21014   SDValue Ops[] = {LHS, RHS};
21015   EVT VT = N->getValueType(0);
21016   unsigned Opcode = N->getOpcode();
21017   SDNodeFlags Flags = N->getFlags();
21018 
21019   // See if we can constant fold the vector operation.
21020   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
21021           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
21022     return Fold;
21023 
21024   // Move unary shuffles with identical masks after a vector binop:
21025   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
21026   //   --> shuffle (VBinOp A, B), Undef, Mask
21027   // This does not require type legality checks because we are creating the
21028   // same types of operations that are in the original sequence. We do have to
21029   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
21030   // though. This code is adapted from the identical transform in instcombine.
21031   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
21032       Opcode != ISD::UREM && Opcode != ISD::SREM &&
21033       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
21034     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
21035     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
21036     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
21037         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
21038         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
21039       SDLoc DL(N);
21040       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
21041                                      RHS.getOperand(0), Flags);
21042       SDValue UndefV = LHS.getOperand(1);
21043       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
21044     }
21045 
21046     // Try to sink a splat shuffle after a binop with a uniform constant.
21047     // This is limited to cases where neither the shuffle nor the constant have
21048     // undefined elements because that could be poison-unsafe or inhibit
21049     // demanded elements analysis. It is further limited to not change a splat
21050     // of an inserted scalar because that may be optimized better by
21051     // load-folding or other target-specific behaviors.
21052     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
21053         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
21054         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21055       // binop (splat X), (splat C) --> splat (binop X, C)
21056       SDLoc DL(N);
21057       SDValue X = Shuf0->getOperand(0);
21058       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
21059       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21060                                   Shuf0->getMask());
21061     }
21062     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
21063         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
21064         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21065       // binop (splat C), (splat X) --> splat (binop C, X)
21066       SDLoc DL(N);
21067       SDValue X = Shuf1->getOperand(0);
21068       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
21069       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21070                                   Shuf1->getMask());
21071     }
21072   }
21073 
21074   // The following pattern is likely to emerge with vector reduction ops. Moving
21075   // the binary operation ahead of insertion may allow using a narrower vector
21076   // instruction that has better performance than the wide version of the op:
21077   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
21078   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
21079       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
21080       LHS.getOperand(2) == RHS.getOperand(2) &&
21081       (LHS.hasOneUse() || RHS.hasOneUse())) {
21082     SDValue X = LHS.getOperand(1);
21083     SDValue Y = RHS.getOperand(1);
21084     SDValue Z = LHS.getOperand(2);
21085     EVT NarrowVT = X.getValueType();
21086     if (NarrowVT == Y.getValueType() &&
21087         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
21088                                               LegalOperations)) {
21089       // (binop undef, undef) may not return undef, so compute that result.
21090       SDLoc DL(N);
21091       SDValue VecC =
21092           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
21093       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
21094       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
21095     }
21096   }
21097 
21098   // Make sure all but the first op are undef or constant.
21099   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
21100     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
21101            std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
21102                      [](const SDValue &Op) {
21103                        return Op.isUndef() ||
21104                               ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
21105                      });
21106   };
21107 
21108   // The following pattern is likely to emerge with vector reduction ops. Moving
21109   // the binary operation ahead of the concat may allow using a narrower vector
21110   // instruction that has better performance than the wide version of the op:
21111   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
21112   //   concat (VBinOp X, Y), VecC
21113   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
21114       (LHS.hasOneUse() || RHS.hasOneUse())) {
21115     EVT NarrowVT = LHS.getOperand(0).getValueType();
21116     if (NarrowVT == RHS.getOperand(0).getValueType() &&
21117         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
21118       SDLoc DL(N);
21119       unsigned NumOperands = LHS.getNumOperands();
21120       SmallVector<SDValue, 4> ConcatOps;
21121       for (unsigned i = 0; i != NumOperands; ++i) {
21122         // This constant fold for operands 1 and up.
21123         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
21124                                         RHS.getOperand(i)));
21125       }
21126 
21127       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
21128     }
21129   }
21130 
21131   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
21132     return V;
21133 
21134   return SDValue();
21135 }
21136 
21137 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
21138                                     SDValue N2) {
21139   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
21140 
21141   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
21142                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
21143 
21144   // If we got a simplified select_cc node back from SimplifySelectCC, then
21145   // break it down into a new SETCC node, and a new SELECT node, and then return
21146   // the SELECT node, since we were called with a SELECT node.
21147   if (SCC.getNode()) {
21148     // Check to see if we got a select_cc back (to turn into setcc/select).
21149     // Otherwise, just return whatever node we got back, like fabs.
21150     if (SCC.getOpcode() == ISD::SELECT_CC) {
21151       const SDNodeFlags Flags = N0.getNode()->getFlags();
21152       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
21153                                   N0.getValueType(),
21154                                   SCC.getOperand(0), SCC.getOperand(1),
21155                                   SCC.getOperand(4), Flags);
21156       AddToWorklist(SETCC.getNode());
21157       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
21158                                          SCC.getOperand(2), SCC.getOperand(3));
21159       SelectNode->setFlags(Flags);
21160       return SelectNode;
21161     }
21162 
21163     return SCC;
21164   }
21165   return SDValue();
21166 }
21167 
21168 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
21169 /// being selected between, see if we can simplify the select.  Callers of this
21170 /// should assume that TheSelect is deleted if this returns true.  As such, they
21171 /// should return the appropriate thing (e.g. the node) back to the top-level of
21172 /// the DAG combiner loop to avoid it being looked at.
21173 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
21174                                     SDValue RHS) {
21175   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
21176   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
21177   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
21178     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
21179       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
21180       SDValue Sqrt = RHS;
21181       ISD::CondCode CC;
21182       SDValue CmpLHS;
21183       const ConstantFPSDNode *Zero = nullptr;
21184 
21185       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
21186         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
21187         CmpLHS = TheSelect->getOperand(0);
21188         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
21189       } else {
21190         // SELECT or VSELECT
21191         SDValue Cmp = TheSelect->getOperand(0);
21192         if (Cmp.getOpcode() == ISD::SETCC) {
21193           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
21194           CmpLHS = Cmp.getOperand(0);
21195           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
21196         }
21197       }
21198       if (Zero && Zero->isZero() &&
21199           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
21200           CC == ISD::SETULT || CC == ISD::SETLT)) {
21201         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
21202         CombineTo(TheSelect, Sqrt);
21203         return true;
21204       }
21205     }
21206   }
21207   // Cannot simplify select with vector condition
21208   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
21209 
21210   // If this is a select from two identical things, try to pull the operation
21211   // through the select.
21212   if (LHS.getOpcode() != RHS.getOpcode() ||
21213       !LHS.hasOneUse() || !RHS.hasOneUse())
21214     return false;
21215 
21216   // If this is a load and the token chain is identical, replace the select
21217   // of two loads with a load through a select of the address to load from.
21218   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
21219   // constants have been dropped into the constant pool.
21220   if (LHS.getOpcode() == ISD::LOAD) {
21221     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
21222     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
21223 
21224     // Token chains must be identical.
21225     if (LHS.getOperand(0) != RHS.getOperand(0) ||
21226         // Do not let this transformation reduce the number of volatile loads.
21227         // Be conservative for atomics for the moment
21228         // TODO: This does appear to be legal for unordered atomics (see D66309)
21229         !LLD->isSimple() || !RLD->isSimple() ||
21230         // FIXME: If either is a pre/post inc/dec load,
21231         // we'd need to split out the address adjustment.
21232         LLD->isIndexed() || RLD->isIndexed() ||
21233         // If this is an EXTLOAD, the VT's must match.
21234         LLD->getMemoryVT() != RLD->getMemoryVT() ||
21235         // If this is an EXTLOAD, the kind of extension must match.
21236         (LLD->getExtensionType() != RLD->getExtensionType() &&
21237          // The only exception is if one of the extensions is anyext.
21238          LLD->getExtensionType() != ISD::EXTLOAD &&
21239          RLD->getExtensionType() != ISD::EXTLOAD) ||
21240         // FIXME: this discards src value information.  This is
21241         // over-conservative. It would be beneficial to be able to remember
21242         // both potential memory locations.  Since we are discarding
21243         // src value info, don't do the transformation if the memory
21244         // locations are not in the default address space.
21245         LLD->getPointerInfo().getAddrSpace() != 0 ||
21246         RLD->getPointerInfo().getAddrSpace() != 0 ||
21247         // We can't produce a CMOV of a TargetFrameIndex since we won't
21248         // generate the address generation required.
21249         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21250         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21251         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
21252                                       LLD->getBasePtr().getValueType()))
21253       return false;
21254 
21255     // The loads must not depend on one another.
21256     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
21257       return false;
21258 
21259     // Check that the select condition doesn't reach either load.  If so,
21260     // folding this will induce a cycle into the DAG.  If not, this is safe to
21261     // xform, so create a select of the addresses.
21262 
21263     SmallPtrSet<const SDNode *, 32> Visited;
21264     SmallVector<const SDNode *, 16> Worklist;
21265 
21266     // Always fail if LLD and RLD are not independent. TheSelect is a
21267     // predecessor to all Nodes in question so we need not search past it.
21268 
21269     Visited.insert(TheSelect);
21270     Worklist.push_back(LLD);
21271     Worklist.push_back(RLD);
21272 
21273     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
21274         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
21275       return false;
21276 
21277     SDValue Addr;
21278     if (TheSelect->getOpcode() == ISD::SELECT) {
21279       // We cannot do this optimization if any pair of {RLD, LLD} is a
21280       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
21281       // Loads, we only need to check if CondNode is a successor to one of the
21282       // loads. We can further avoid this if there's no use of their chain
21283       // value.
21284       SDNode *CondNode = TheSelect->getOperand(0).getNode();
21285       Worklist.push_back(CondNode);
21286 
21287       if ((LLD->hasAnyUseOfValue(1) &&
21288            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21289           (RLD->hasAnyUseOfValue(1) &&
21290            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21291         return false;
21292 
21293       Addr = DAG.getSelect(SDLoc(TheSelect),
21294                            LLD->getBasePtr().getValueType(),
21295                            TheSelect->getOperand(0), LLD->getBasePtr(),
21296                            RLD->getBasePtr());
21297     } else {  // Otherwise SELECT_CC
21298       // We cannot do this optimization if any pair of {RLD, LLD} is a
21299       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
21300       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
21301       // one of the loads. We can further avoid this if there's no use of their
21302       // chain value.
21303 
21304       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
21305       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
21306       Worklist.push_back(CondLHS);
21307       Worklist.push_back(CondRHS);
21308 
21309       if ((LLD->hasAnyUseOfValue(1) &&
21310            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21311           (RLD->hasAnyUseOfValue(1) &&
21312            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21313         return false;
21314 
21315       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
21316                          LLD->getBasePtr().getValueType(),
21317                          TheSelect->getOperand(0),
21318                          TheSelect->getOperand(1),
21319                          LLD->getBasePtr(), RLD->getBasePtr(),
21320                          TheSelect->getOperand(4));
21321     }
21322 
21323     SDValue Load;
21324     // It is safe to replace the two loads if they have different alignments,
21325     // but the new load must be the minimum (most restrictive) alignment of the
21326     // inputs.
21327     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
21328     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
21329     if (!RLD->isInvariant())
21330       MMOFlags &= ~MachineMemOperand::MOInvariant;
21331     if (!RLD->isDereferenceable())
21332       MMOFlags &= ~MachineMemOperand::MODereferenceable;
21333     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
21334       // FIXME: Discards pointer and AA info.
21335       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
21336                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
21337                          MMOFlags);
21338     } else {
21339       // FIXME: Discards pointer and AA info.
21340       Load = DAG.getExtLoad(
21341           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
21342                                                   : LLD->getExtensionType(),
21343           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
21344           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
21345     }
21346 
21347     // Users of the select now use the result of the load.
21348     CombineTo(TheSelect, Load);
21349 
21350     // Users of the old loads now use the new load's chain.  We know the
21351     // old-load value is dead now.
21352     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
21353     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
21354     return true;
21355   }
21356 
21357   return false;
21358 }
21359 
21360 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
21361 /// bitwise 'and'.
21362 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
21363                                             SDValue N1, SDValue N2, SDValue N3,
21364                                             ISD::CondCode CC) {
21365   // If this is a select where the false operand is zero and the compare is a
21366   // check of the sign bit, see if we can perform the "gzip trick":
21367   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
21368   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
21369   EVT XType = N0.getValueType();
21370   EVT AType = N2.getValueType();
21371   if (!isNullConstant(N3) || !XType.bitsGE(AType))
21372     return SDValue();
21373 
21374   // If the comparison is testing for a positive value, we have to invert
21375   // the sign bit mask, so only do that transform if the target has a bitwise
21376   // 'and not' instruction (the invert is free).
21377   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
21378     // (X > -1) ? A : 0
21379     // (X >  0) ? X : 0 <-- This is canonical signed max.
21380     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
21381       return SDValue();
21382   } else if (CC == ISD::SETLT) {
21383     // (X <  0) ? A : 0
21384     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
21385     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
21386       return SDValue();
21387   } else {
21388     return SDValue();
21389   }
21390 
21391   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
21392   // constant.
21393   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
21394   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21395   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
21396     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
21397     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
21398       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21399       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
21400       AddToWorklist(Shift.getNode());
21401 
21402       if (XType.bitsGT(AType)) {
21403         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21404         AddToWorklist(Shift.getNode());
21405       }
21406 
21407       if (CC == ISD::SETGT)
21408         Shift = DAG.getNOT(DL, Shift, AType);
21409 
21410       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21411     }
21412   }
21413 
21414   unsigned ShCt = XType.getSizeInBits() - 1;
21415   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
21416     return SDValue();
21417 
21418   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21419   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
21420   AddToWorklist(Shift.getNode());
21421 
21422   if (XType.bitsGT(AType)) {
21423     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21424     AddToWorklist(Shift.getNode());
21425   }
21426 
21427   if (CC == ISD::SETGT)
21428     Shift = DAG.getNOT(DL, Shift, AType);
21429 
21430   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21431 }
21432 
21433 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
21434 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
21435   SDValue N0 = N->getOperand(0);
21436   EVT VT = N->getValueType(0);
21437   bool IsFabs = N->getOpcode() == ISD::FABS;
21438   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
21439 
21440   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
21441     return SDValue();
21442 
21443   SDValue Int = N0.getOperand(0);
21444   EVT IntVT = Int.getValueType();
21445 
21446   // The operand to cast should be integer.
21447   if (!IntVT.isInteger() || IntVT.isVector())
21448     return SDValue();
21449 
21450   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
21451   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
21452   APInt SignMask;
21453   if (N0.getValueType().isVector()) {
21454     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
21455     // 0x7f...) per element and splat it.
21456     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
21457     if (IsFabs)
21458       SignMask = ~SignMask;
21459     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
21460   } else {
21461     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
21462     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
21463     if (IsFabs)
21464       SignMask = ~SignMask;
21465   }
21466   SDLoc DL(N0);
21467   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
21468                     DAG.getConstant(SignMask, DL, IntVT));
21469   AddToWorklist(Int.getNode());
21470   return DAG.getBitcast(VT, Int);
21471 }
21472 
21473 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
21474 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
21475 /// in it. This may be a win when the constant is not otherwise available
21476 /// because it replaces two constant pool loads with one.
21477 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
21478     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
21479     ISD::CondCode CC) {
21480   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
21481     return SDValue();
21482 
21483   // If we are before legalize types, we want the other legalization to happen
21484   // first (for example, to avoid messing with soft float).
21485   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
21486   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
21487   EVT VT = N2.getValueType();
21488   if (!TV || !FV || !TLI.isTypeLegal(VT))
21489     return SDValue();
21490 
21491   // If a constant can be materialized without loads, this does not make sense.
21492   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
21493       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
21494       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
21495     return SDValue();
21496 
21497   // If both constants have multiple uses, then we won't need to do an extra
21498   // load. The values are likely around in registers for other users.
21499   if (!TV->hasOneUse() && !FV->hasOneUse())
21500     return SDValue();
21501 
21502   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
21503                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
21504   Type *FPTy = Elts[0]->getType();
21505   const DataLayout &TD = DAG.getDataLayout();
21506 
21507   // Create a ConstantArray of the two constants.
21508   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
21509   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
21510                                       TD.getPrefTypeAlign(FPTy));
21511   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
21512 
21513   // Get offsets to the 0 and 1 elements of the array, so we can select between
21514   // them.
21515   SDValue Zero = DAG.getIntPtrConstant(0, DL);
21516   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
21517   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
21518   SDValue Cond =
21519       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
21520   AddToWorklist(Cond.getNode());
21521   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
21522   AddToWorklist(CstOffset.getNode());
21523   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
21524   AddToWorklist(CPIdx.getNode());
21525   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
21526                      MachinePointerInfo::getConstantPool(
21527                          DAG.getMachineFunction()), Alignment);
21528 }
21529 
21530 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
21531 /// where 'cond' is the comparison specified by CC.
21532 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
21533                                       SDValue N2, SDValue N3, ISD::CondCode CC,
21534                                       bool NotExtCompare) {
21535   // (x ? y : y) -> y.
21536   if (N2 == N3) return N2;
21537 
21538   EVT CmpOpVT = N0.getValueType();
21539   EVT CmpResVT = getSetCCResultType(CmpOpVT);
21540   EVT VT = N2.getValueType();
21541   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
21542   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21543   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
21544 
21545   // Determine if the condition we're dealing with is constant.
21546   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
21547     AddToWorklist(SCC.getNode());
21548     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
21549       // fold select_cc true, x, y -> x
21550       // fold select_cc false, x, y -> y
21551       return !(SCCC->isNullValue()) ? N2 : N3;
21552     }
21553   }
21554 
21555   if (SDValue V =
21556           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
21557     return V;
21558 
21559   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
21560     return V;
21561 
21562   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
21563   // where y is has a single bit set.
21564   // A plaintext description would be, we can turn the SELECT_CC into an AND
21565   // when the condition can be materialized as an all-ones register.  Any
21566   // single bit-test can be materialized as an all-ones register with
21567   // shift-left and shift-right-arith.
21568   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
21569       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
21570     SDValue AndLHS = N0->getOperand(0);
21571     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21572     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
21573       // Shift the tested bit over the sign bit.
21574       const APInt &AndMask = ConstAndRHS->getAPIntValue();
21575       unsigned ShCt = AndMask.getBitWidth() - 1;
21576       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
21577         SDValue ShlAmt =
21578           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
21579                           getShiftAmountTy(AndLHS.getValueType()));
21580         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
21581 
21582         // Now arithmetic right shift it all the way over, so the result is
21583         // either all-ones, or zero.
21584         SDValue ShrAmt =
21585           DAG.getConstant(ShCt, SDLoc(Shl),
21586                           getShiftAmountTy(Shl.getValueType()));
21587         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
21588 
21589         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
21590       }
21591     }
21592   }
21593 
21594   // fold select C, 16, 0 -> shl C, 4
21595   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
21596   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
21597 
21598   if ((Fold || Swap) &&
21599       TLI.getBooleanContents(CmpOpVT) ==
21600           TargetLowering::ZeroOrOneBooleanContent &&
21601       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
21602 
21603     if (Swap) {
21604       CC = ISD::getSetCCInverse(CC, CmpOpVT);
21605       std::swap(N2C, N3C);
21606     }
21607 
21608     // If the caller doesn't want us to simplify this into a zext of a compare,
21609     // don't do it.
21610     if (NotExtCompare && N2C->isOne())
21611       return SDValue();
21612 
21613     SDValue Temp, SCC;
21614     // zext (setcc n0, n1)
21615     if (LegalTypes) {
21616       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
21617       if (VT.bitsLT(SCC.getValueType()))
21618         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
21619       else
21620         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21621     } else {
21622       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
21623       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21624     }
21625 
21626     AddToWorklist(SCC.getNode());
21627     AddToWorklist(Temp.getNode());
21628 
21629     if (N2C->isOne())
21630       return Temp;
21631 
21632     unsigned ShCt = N2C->getAPIntValue().logBase2();
21633     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
21634       return SDValue();
21635 
21636     // shl setcc result by log2 n2c
21637     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
21638                        DAG.getConstant(ShCt, SDLoc(Temp),
21639                                        getShiftAmountTy(Temp.getValueType())));
21640   }
21641 
21642   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
21643   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
21644   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
21645   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
21646   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
21647   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
21648   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
21649   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
21650   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
21651     SDValue ValueOnZero = N2;
21652     SDValue Count = N3;
21653     // If the condition is NE instead of E, swap the operands.
21654     if (CC == ISD::SETNE)
21655       std::swap(ValueOnZero, Count);
21656     // Check if the value on zero is a constant equal to the bits in the type.
21657     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
21658       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
21659         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
21660         // legal, combine to just cttz.
21661         if ((Count.getOpcode() == ISD::CTTZ ||
21662              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
21663             N0 == Count.getOperand(0) &&
21664             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
21665           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
21666         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
21667         // legal, combine to just ctlz.
21668         if ((Count.getOpcode() == ISD::CTLZ ||
21669              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
21670             N0 == Count.getOperand(0) &&
21671             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
21672           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
21673       }
21674     }
21675   }
21676 
21677   return SDValue();
21678 }
21679 
21680 /// This is a stub for TargetLowering::SimplifySetCC.
21681 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
21682                                    ISD::CondCode Cond, const SDLoc &DL,
21683                                    bool foldBooleans) {
21684   TargetLowering::DAGCombinerInfo
21685     DagCombineInfo(DAG, Level, false, this);
21686   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
21687 }
21688 
21689 /// Given an ISD::SDIV node expressing a divide by constant, return
21690 /// a DAG expression to select that will generate the same value by multiplying
21691 /// by a magic number.
21692 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21693 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
21694   // when optimising for minimum size, we don't want to expand a div to a mul
21695   // and a shift.
21696   if (DAG.getMachineFunction().getFunction().hasMinSize())
21697     return SDValue();
21698 
21699   SmallVector<SDNode *, 8> Built;
21700   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
21701     for (SDNode *N : Built)
21702       AddToWorklist(N);
21703     return S;
21704   }
21705 
21706   return SDValue();
21707 }
21708 
21709 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
21710 /// DAG expression that will generate the same value by right shifting.
21711 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
21712   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
21713   if (!C)
21714     return SDValue();
21715 
21716   // Avoid division by zero.
21717   if (C->isNullValue())
21718     return SDValue();
21719 
21720   SmallVector<SDNode *, 8> Built;
21721   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
21722     for (SDNode *N : Built)
21723       AddToWorklist(N);
21724     return S;
21725   }
21726 
21727   return SDValue();
21728 }
21729 
21730 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
21731 /// expression that will generate the same value by multiplying by a magic
21732 /// number.
21733 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21734 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
21735   // when optimising for minimum size, we don't want to expand a div to a mul
21736   // and a shift.
21737   if (DAG.getMachineFunction().getFunction().hasMinSize())
21738     return SDValue();
21739 
21740   SmallVector<SDNode *, 8> Built;
21741   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
21742     for (SDNode *N : Built)
21743       AddToWorklist(N);
21744     return S;
21745   }
21746 
21747   return SDValue();
21748 }
21749 
21750 /// Determines the LogBase2 value for a non-null input value using the
21751 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
21752 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
21753   EVT VT = V.getValueType();
21754   unsigned EltBits = VT.getScalarSizeInBits();
21755   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
21756   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
21757   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
21758   return LogBase2;
21759 }
21760 
21761 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21762 /// For the reciprocal, we need to find the zero of the function:
21763 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
21764 ///     =>
21765 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
21766 ///     does not require additional intermediate precision]
21767 /// For the last iteration, put numerator N into it to gain more precision:
21768 ///   Result = N X_i + X_i (N - N A X_i)
21769 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
21770                                       SDNodeFlags Flags) {
21771   if (LegalDAG)
21772     return SDValue();
21773 
21774   // TODO: Handle half and/or extended types?
21775   EVT VT = Op.getValueType();
21776   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21777     return SDValue();
21778 
21779   // If estimates are explicitly disabled for this function, we're done.
21780   MachineFunction &MF = DAG.getMachineFunction();
21781   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
21782   if (Enabled == TLI.ReciprocalEstimate::Disabled)
21783     return SDValue();
21784 
21785   // Estimates may be explicitly enabled for this type with a custom number of
21786   // refinement steps.
21787   int Iterations = TLI.getDivRefinementSteps(VT, MF);
21788   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
21789     AddToWorklist(Est.getNode());
21790 
21791     SDLoc DL(Op);
21792     if (Iterations) {
21793       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
21794 
21795       // Newton iterations: Est = Est + Est (N - Arg * Est)
21796       // If this is the last iteration, also multiply by the numerator.
21797       for (int i = 0; i < Iterations; ++i) {
21798         SDValue MulEst = Est;
21799 
21800         if (i == Iterations - 1) {
21801           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
21802           AddToWorklist(MulEst.getNode());
21803         }
21804 
21805         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
21806         AddToWorklist(NewEst.getNode());
21807 
21808         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
21809                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
21810         AddToWorklist(NewEst.getNode());
21811 
21812         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21813         AddToWorklist(NewEst.getNode());
21814 
21815         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
21816         AddToWorklist(Est.getNode());
21817       }
21818     } else {
21819       // If no iterations are available, multiply with N.
21820       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
21821       AddToWorklist(Est.getNode());
21822     }
21823 
21824     return Est;
21825   }
21826 
21827   return SDValue();
21828 }
21829 
21830 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21831 /// For the reciprocal sqrt, we need to find the zero of the function:
21832 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21833 ///     =>
21834 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
21835 /// As a result, we precompute A/2 prior to the iteration loop.
21836 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
21837                                          unsigned Iterations,
21838                                          SDNodeFlags Flags, bool Reciprocal) {
21839   EVT VT = Arg.getValueType();
21840   SDLoc DL(Arg);
21841   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
21842 
21843   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
21844   // this entire sequence requires only one FP constant.
21845   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
21846   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
21847 
21848   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
21849   for (unsigned i = 0; i < Iterations; ++i) {
21850     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
21851     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
21852     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
21853     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21854   }
21855 
21856   // If non-reciprocal square root is requested, multiply the result by Arg.
21857   if (!Reciprocal)
21858     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
21859 
21860   return Est;
21861 }
21862 
21863 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21864 /// For the reciprocal sqrt, we need to find the zero of the function:
21865 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21866 ///     =>
21867 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
21868 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
21869                                          unsigned Iterations,
21870                                          SDNodeFlags Flags, bool Reciprocal) {
21871   EVT VT = Arg.getValueType();
21872   SDLoc DL(Arg);
21873   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
21874   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
21875 
21876   // This routine must enter the loop below to work correctly
21877   // when (Reciprocal == false).
21878   assert(Iterations > 0);
21879 
21880   // Newton iterations for reciprocal square root:
21881   // E = (E * -0.5) * ((A * E) * E + -3.0)
21882   for (unsigned i = 0; i < Iterations; ++i) {
21883     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
21884     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
21885     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
21886 
21887     // When calculating a square root at the last iteration build:
21888     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
21889     // (notice a common subexpression)
21890     SDValue LHS;
21891     if (Reciprocal || (i + 1) < Iterations) {
21892       // RSQRT: LHS = (E * -0.5)
21893       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
21894     } else {
21895       // SQRT: LHS = (A * E) * -0.5
21896       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
21897     }
21898 
21899     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
21900   }
21901 
21902   return Est;
21903 }
21904 
21905 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
21906 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
21907 /// Op can be zero.
21908 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
21909                                            bool Reciprocal) {
21910   if (LegalDAG)
21911     return SDValue();
21912 
21913   // TODO: Handle half and/or extended types?
21914   EVT VT = Op.getValueType();
21915   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21916     return SDValue();
21917 
21918   // If estimates are explicitly disabled for this function, we're done.
21919   MachineFunction &MF = DAG.getMachineFunction();
21920   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
21921   if (Enabled == TLI.ReciprocalEstimate::Disabled)
21922     return SDValue();
21923 
21924   // Estimates may be explicitly enabled for this type with a custom number of
21925   // refinement steps.
21926   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
21927 
21928   bool UseOneConstNR = false;
21929   if (SDValue Est =
21930       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
21931                           Reciprocal)) {
21932     AddToWorklist(Est.getNode());
21933 
21934     if (Iterations) {
21935       Est = UseOneConstNR
21936             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
21937             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
21938 
21939       if (!Reciprocal) {
21940         // The estimate is now completely wrong if the input was exactly 0.0 or
21941         // possibly a denormal. Force the answer to 0.0 for those cases.
21942         SDLoc DL(Op);
21943         EVT CCVT = getSetCCResultType(VT);
21944         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
21945         DenormalMode DenormMode = DAG.getDenormalMode(VT);
21946         if (DenormMode.Input == DenormalMode::IEEE) {
21947           // This is specifically a check for the handling of denormal inputs,
21948           // not the result.
21949 
21950           // fabs(X) < SmallestNormal ? 0.0 : Est
21951           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
21952           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
21953           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
21954           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21955           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
21956           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
21957           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
21958         } else {
21959           // X == 0.0 ? 0.0 : Est
21960           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21961           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
21962           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
21963         }
21964       }
21965     }
21966     return Est;
21967   }
21968 
21969   return SDValue();
21970 }
21971 
21972 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21973   return buildSqrtEstimateImpl(Op, Flags, true);
21974 }
21975 
21976 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21977   return buildSqrtEstimateImpl(Op, Flags, false);
21978 }
21979 
21980 /// Return true if there is any possibility that the two addresses overlap.
21981 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
21982 
21983   struct MemUseCharacteristics {
21984     bool IsVolatile;
21985     bool IsAtomic;
21986     SDValue BasePtr;
21987     int64_t Offset;
21988     Optional<int64_t> NumBytes;
21989     MachineMemOperand *MMO;
21990   };
21991 
21992   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
21993     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
21994       int64_t Offset = 0;
21995       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
21996         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
21997                      ? C->getSExtValue()
21998                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
21999                            ? -1 * C->getSExtValue()
22000                            : 0;
22001       uint64_t Size =
22002           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
22003       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
22004               Offset /*base offset*/,
22005               Optional<int64_t>(Size),
22006               LSN->getMemOperand()};
22007     }
22008     if (const auto *LN = cast<LifetimeSDNode>(N))
22009       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
22010               (LN->hasOffset()) ? LN->getOffset() : 0,
22011               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
22012                                 : Optional<int64_t>(),
22013               (MachineMemOperand *)nullptr};
22014     // Default.
22015     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
22016             (int64_t)0 /*offset*/,
22017             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
22018   };
22019 
22020   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
22021                         MUC1 = getCharacteristics(Op1);
22022 
22023   // If they are to the same address, then they must be aliases.
22024   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
22025       MUC0.Offset == MUC1.Offset)
22026     return true;
22027 
22028   // If they are both volatile then they cannot be reordered.
22029   if (MUC0.IsVolatile && MUC1.IsVolatile)
22030     return true;
22031 
22032   // Be conservative about atomics for the moment
22033   // TODO: This is way overconservative for unordered atomics (see D66309)
22034   if (MUC0.IsAtomic && MUC1.IsAtomic)
22035     return true;
22036 
22037   if (MUC0.MMO && MUC1.MMO) {
22038     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
22039         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22040       return false;
22041   }
22042 
22043   // Try to prove that there is aliasing, or that there is no aliasing. Either
22044   // way, we can return now. If nothing can be proved, proceed with more tests.
22045   bool IsAlias;
22046   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
22047                                        DAG, IsAlias))
22048     return IsAlias;
22049 
22050   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
22051   // either are not known.
22052   if (!MUC0.MMO || !MUC1.MMO)
22053     return true;
22054 
22055   // If one operation reads from invariant memory, and the other may store, they
22056   // cannot alias. These should really be checking the equivalent of mayWrite,
22057   // but it only matters for memory nodes other than load /store.
22058   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
22059       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22060     return false;
22061 
22062   // If we know required SrcValue1 and SrcValue2 have relatively large
22063   // alignment compared to the size and offset of the access, we may be able
22064   // to prove they do not alias. This check is conservative for now to catch
22065   // cases created by splitting vector types, it only works when the offsets are
22066   // multiples of the size of the data.
22067   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
22068   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
22069   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
22070   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
22071   auto &Size0 = MUC0.NumBytes;
22072   auto &Size1 = MUC1.NumBytes;
22073   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
22074       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
22075       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
22076       SrcValOffset1 % *Size1 == 0) {
22077     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
22078     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
22079 
22080     // There is no overlap between these relatively aligned accesses of
22081     // similar size. Return no alias.
22082     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
22083       return false;
22084   }
22085 
22086   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
22087                    ? CombinerGlobalAA
22088                    : DAG.getSubtarget().useAA();
22089 #ifndef NDEBUG
22090   if (CombinerAAOnlyFunc.getNumOccurrences() &&
22091       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
22092     UseAA = false;
22093 #endif
22094 
22095   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
22096       Size0.hasValue() && Size1.hasValue()) {
22097     // Use alias analysis information.
22098     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
22099     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
22100     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
22101     AliasResult AAResult = AA->alias(
22102         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
22103                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
22104         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
22105                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
22106     if (AAResult == NoAlias)
22107       return false;
22108   }
22109 
22110   // Otherwise we have to assume they alias.
22111   return true;
22112 }
22113 
22114 /// Walk up chain skipping non-aliasing memory nodes,
22115 /// looking for aliasing nodes and adding them to the Aliases vector.
22116 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
22117                                    SmallVectorImpl<SDValue> &Aliases) {
22118   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
22119   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
22120 
22121   // Get alias information for node.
22122   // TODO: relax aliasing for unordered atomics (see D66309)
22123   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
22124 
22125   // Starting off.
22126   Chains.push_back(OriginalChain);
22127   unsigned Depth = 0;
22128 
22129   // Attempt to improve chain by a single step
22130   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
22131     switch (C.getOpcode()) {
22132     case ISD::EntryToken:
22133       // No need to mark EntryToken.
22134       C = SDValue();
22135       return true;
22136     case ISD::LOAD:
22137     case ISD::STORE: {
22138       // Get alias information for C.
22139       // TODO: Relax aliasing for unordered atomics (see D66309)
22140       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
22141                       cast<LSBaseSDNode>(C.getNode())->isSimple();
22142       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
22143         // Look further up the chain.
22144         C = C.getOperand(0);
22145         return true;
22146       }
22147       // Alias, so stop here.
22148       return false;
22149     }
22150 
22151     case ISD::CopyFromReg:
22152       // Always forward past past CopyFromReg.
22153       C = C.getOperand(0);
22154       return true;
22155 
22156     case ISD::LIFETIME_START:
22157     case ISD::LIFETIME_END: {
22158       // We can forward past any lifetime start/end that can be proven not to
22159       // alias the memory access.
22160       if (!isAlias(N, C.getNode())) {
22161         // Look further up the chain.
22162         C = C.getOperand(0);
22163         return true;
22164       }
22165       return false;
22166     }
22167     default:
22168       return false;
22169     }
22170   };
22171 
22172   // Look at each chain and determine if it is an alias.  If so, add it to the
22173   // aliases list.  If not, then continue up the chain looking for the next
22174   // candidate.
22175   while (!Chains.empty()) {
22176     SDValue Chain = Chains.pop_back_val();
22177 
22178     // Don't bother if we've seen Chain before.
22179     if (!Visited.insert(Chain.getNode()).second)
22180       continue;
22181 
22182     // For TokenFactor nodes, look at each operand and only continue up the
22183     // chain until we reach the depth limit.
22184     //
22185     // FIXME: The depth check could be made to return the last non-aliasing
22186     // chain we found before we hit a tokenfactor rather than the original
22187     // chain.
22188     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
22189       Aliases.clear();
22190       Aliases.push_back(OriginalChain);
22191       return;
22192     }
22193 
22194     if (Chain.getOpcode() == ISD::TokenFactor) {
22195       // We have to check each of the operands of the token factor for "small"
22196       // token factors, so we queue them up.  Adding the operands to the queue
22197       // (stack) in reverse order maintains the original order and increases the
22198       // likelihood that getNode will find a matching token factor (CSE.)
22199       if (Chain.getNumOperands() > 16) {
22200         Aliases.push_back(Chain);
22201         continue;
22202       }
22203       for (unsigned n = Chain.getNumOperands(); n;)
22204         Chains.push_back(Chain.getOperand(--n));
22205       ++Depth;
22206       continue;
22207     }
22208     // Everything else
22209     if (ImproveChain(Chain)) {
22210       // Updated Chain Found, Consider new chain if one exists.
22211       if (Chain.getNode())
22212         Chains.push_back(Chain);
22213       ++Depth;
22214       continue;
22215     }
22216     // No Improved Chain Possible, treat as Alias.
22217     Aliases.push_back(Chain);
22218   }
22219 }
22220 
22221 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
22222 /// (aliasing node.)
22223 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
22224   if (OptLevel == CodeGenOpt::None)
22225     return OldChain;
22226 
22227   // Ops for replacing token factor.
22228   SmallVector<SDValue, 8> Aliases;
22229 
22230   // Accumulate all the aliases to this node.
22231   GatherAllAliases(N, OldChain, Aliases);
22232 
22233   // If no operands then chain to entry token.
22234   if (Aliases.size() == 0)
22235     return DAG.getEntryNode();
22236 
22237   // If a single operand then chain to it.  We don't need to revisit it.
22238   if (Aliases.size() == 1)
22239     return Aliases[0];
22240 
22241   // Construct a custom tailored token factor.
22242   return DAG.getTokenFactor(SDLoc(N), Aliases);
22243 }
22244 
22245 namespace {
22246 // TODO: Replace with with std::monostate when we move to C++17.
22247 struct UnitT { } Unit;
22248 bool operator==(const UnitT &, const UnitT &) { return true; }
22249 bool operator!=(const UnitT &, const UnitT &) { return false; }
22250 } // namespace
22251 
22252 // This function tries to collect a bunch of potentially interesting
22253 // nodes to improve the chains of, all at once. This might seem
22254 // redundant, as this function gets called when visiting every store
22255 // node, so why not let the work be done on each store as it's visited?
22256 //
22257 // I believe this is mainly important because mergeConsecutiveStores
22258 // is unable to deal with merging stores of different sizes, so unless
22259 // we improve the chains of all the potential candidates up-front
22260 // before running mergeConsecutiveStores, it might only see some of
22261 // the nodes that will eventually be candidates, and then not be able
22262 // to go from a partially-merged state to the desired final
22263 // fully-merged state.
22264 
22265 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
22266   SmallVector<StoreSDNode *, 8> ChainedStores;
22267   StoreSDNode *STChain = St;
22268   // Intervals records which offsets from BaseIndex have been covered. In
22269   // the common case, every store writes to the immediately previous address
22270   // space and thus merged with the previous interval at insertion time.
22271 
22272   using IMap =
22273       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
22274   IMap::Allocator A;
22275   IMap Intervals(A);
22276 
22277   // This holds the base pointer, index, and the offset in bytes from the base
22278   // pointer.
22279   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22280 
22281   // We must have a base and an offset.
22282   if (!BasePtr.getBase().getNode())
22283     return false;
22284 
22285   // Do not handle stores to undef base pointers.
22286   if (BasePtr.getBase().isUndef())
22287     return false;
22288 
22289   // BaseIndexOffset assumes that offsets are fixed-size, which
22290   // is not valid for scalable vectors where the offsets are
22291   // scaled by `vscale`, so bail out early.
22292   if (St->getMemoryVT().isScalableVector())
22293     return false;
22294 
22295   // Add ST's interval.
22296   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
22297 
22298   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
22299     // If the chain has more than one use, then we can't reorder the mem ops.
22300     if (!SDValue(Chain, 0)->hasOneUse())
22301       break;
22302     // TODO: Relax for unordered atomics (see D66309)
22303     if (!Chain->isSimple() || Chain->isIndexed())
22304       break;
22305 
22306     // Find the base pointer and offset for this memory node.
22307     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
22308     // Check that the base pointer is the same as the original one.
22309     int64_t Offset;
22310     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
22311       break;
22312     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
22313     // Make sure we don't overlap with other intervals by checking the ones to
22314     // the left or right before inserting.
22315     auto I = Intervals.find(Offset);
22316     // If there's a next interval, we should end before it.
22317     if (I != Intervals.end() && I.start() < (Offset + Length))
22318       break;
22319     // If there's a previous interval, we should start after it.
22320     if (I != Intervals.begin() && (--I).stop() <= Offset)
22321       break;
22322     Intervals.insert(Offset, Offset + Length, Unit);
22323 
22324     ChainedStores.push_back(Chain);
22325     STChain = Chain;
22326   }
22327 
22328   // If we didn't find a chained store, exit.
22329   if (ChainedStores.size() == 0)
22330     return false;
22331 
22332   // Improve all chained stores (St and ChainedStores members) starting from
22333   // where the store chain ended and return single TokenFactor.
22334   SDValue NewChain = STChain->getChain();
22335   SmallVector<SDValue, 8> TFOps;
22336   for (unsigned I = ChainedStores.size(); I;) {
22337     StoreSDNode *S = ChainedStores[--I];
22338     SDValue BetterChain = FindBetterChain(S, NewChain);
22339     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
22340         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
22341     TFOps.push_back(SDValue(S, 0));
22342     ChainedStores[I] = S;
22343   }
22344 
22345   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
22346   SDValue BetterChain = FindBetterChain(St, NewChain);
22347   SDValue NewST;
22348   if (St->isTruncatingStore())
22349     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
22350                               St->getBasePtr(), St->getMemoryVT(),
22351                               St->getMemOperand());
22352   else
22353     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
22354                          St->getBasePtr(), St->getMemOperand());
22355 
22356   TFOps.push_back(NewST);
22357 
22358   // If we improved every element of TFOps, then we've lost the dependence on
22359   // NewChain to successors of St and we need to add it back to TFOps. Do so at
22360   // the beginning to keep relative order consistent with FindBetterChains.
22361   auto hasImprovedChain = [&](SDValue ST) -> bool {
22362     return ST->getOperand(0) != NewChain;
22363   };
22364   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
22365   if (AddNewChain)
22366     TFOps.insert(TFOps.begin(), NewChain);
22367 
22368   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
22369   CombineTo(St, TF);
22370 
22371   // Add TF and its operands to the worklist.
22372   AddToWorklist(TF.getNode());
22373   for (const SDValue &Op : TF->ops())
22374     AddToWorklist(Op.getNode());
22375   AddToWorklist(STChain);
22376   return true;
22377 }
22378 
22379 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
22380   if (OptLevel == CodeGenOpt::None)
22381     return false;
22382 
22383   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22384 
22385   // We must have a base and an offset.
22386   if (!BasePtr.getBase().getNode())
22387     return false;
22388 
22389   // Do not handle stores to undef base pointers.
22390   if (BasePtr.getBase().isUndef())
22391     return false;
22392 
22393   // Directly improve a chain of disjoint stores starting at St.
22394   if (parallelizeChainedStores(St))
22395     return true;
22396 
22397   // Improve St's Chain..
22398   SDValue BetterChain = FindBetterChain(St, St->getChain());
22399   if (St->getChain() != BetterChain) {
22400     replaceStoreChain(St, BetterChain);
22401     return true;
22402   }
22403   return false;
22404 }
22405 
22406 /// This is the entry point for the file.
22407 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
22408                            CodeGenOpt::Level OptLevel) {
22409   /// This is the main entry point to this class.
22410   DAGCombiner(*this, AA, OptLevel).Run(Level);
22411 }
22412