1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/MemoryLocation.h"
33 #include "llvm/Analysis/VectorUtils.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/SelectionDAG.h"
41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 #include "llvm/CodeGen/SelectionDAGNodes.h"
43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94         cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
97 static cl::opt<std::string>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99                    cl::desc("Only use DAG-combiner alias analysis in this"
100                             " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107                   cl::desc("Bypass the profitability model of load slicing"),
108                   cl::init(false));
109 
110 static cl::opt<bool>
111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112                     cl::desc("DAG combiner may split indexing from loads"));
113 
114 static cl::opt<bool>
115     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
116                        cl::desc("DAG combiner enable merging multiple stores "
117                                 "into a wider store"));
118 
119 static cl::opt<unsigned> TokenFactorInlineLimit(
120     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
121     cl::desc("Limit the number of operands to inline for Token Factors"));
122 
123 static cl::opt<unsigned> StoreMergeDependenceLimit(
124     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
125     cl::desc("Limit the number of times for the same StoreNode and RootNode "
126              "to bail out in store merging dependence check"));
127 
128 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
129     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
130     cl::desc("DAG cominber enable reducing the width of load/op/store "
131              "sequence"));
132 
133 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
134     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
135     cl::desc("DAG cominber enable load/<replace bytes>/store with "
136              "a narrower store"));
137 
138 namespace {
139 
140   class DAGCombiner {
141     SelectionDAG &DAG;
142     const TargetLowering &TLI;
143     const SelectionDAGTargetInfo *STI;
144     CombineLevel Level;
145     CodeGenOpt::Level OptLevel;
146     bool LegalDAG = false;
147     bool LegalOperations = false;
148     bool LegalTypes = false;
149     bool ForCodeSize;
150     bool DisableGenericCombines;
151 
152     /// Worklist of all of the nodes that need to be simplified.
153     ///
154     /// This must behave as a stack -- new nodes to process are pushed onto the
155     /// back and when processing we pop off of the back.
156     ///
157     /// The worklist will not contain duplicates but may contain null entries
158     /// due to nodes being deleted from the underlying DAG.
159     SmallVector<SDNode *, 64> Worklist;
160 
161     /// Mapping from an SDNode to its position on the worklist.
162     ///
163     /// This is used to find and remove nodes from the worklist (by nulling
164     /// them) when they are deleted from the underlying DAG. It relies on
165     /// stable indices of nodes within the worklist.
166     DenseMap<SDNode *, unsigned> WorklistMap;
167     /// This records all nodes attempted to add to the worklist since we
168     /// considered a new worklist entry. As we keep do not add duplicate nodes
169     /// in the worklist, this is different from the tail of the worklist.
170     SmallSetVector<SDNode *, 32> PruningList;
171 
172     /// Set of nodes which have been combined (at least once).
173     ///
174     /// This is used to allow us to reliably add any operands of a DAG node
175     /// which have not yet been combined to the worklist.
176     SmallPtrSet<SDNode *, 32> CombinedNodes;
177 
178     /// Map from candidate StoreNode to the pair of RootNode and count.
179     /// The count is used to track how many times we have seen the StoreNode
180     /// with the same RootNode bail out in dependence check. If we have seen
181     /// the bail out for the same pair many times over a limit, we won't
182     /// consider the StoreNode with the same RootNode as store merging
183     /// candidate again.
184     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
185 
186     // AA - Used for DAG load/store alias analysis.
187     AliasAnalysis *AA;
188 
189     /// When an instruction is simplified, add all users of the instruction to
190     /// the work lists because they might get more simplified now.
191     void AddUsersToWorklist(SDNode *N) {
192       for (SDNode *Node : N->uses())
193         AddToWorklist(Node);
194     }
195 
196     /// Convenient shorthand to add a node and all of its user to the worklist.
197     void AddToWorklistWithUsers(SDNode *N) {
198       AddUsersToWorklist(N);
199       AddToWorklist(N);
200     }
201 
202     // Prune potentially dangling nodes. This is called after
203     // any visit to a node, but should also be called during a visit after any
204     // failed combine which may have created a DAG node.
205     void clearAddedDanglingWorklistEntries() {
206       // Check any nodes added to the worklist to see if they are prunable.
207       while (!PruningList.empty()) {
208         auto *N = PruningList.pop_back_val();
209         if (N->use_empty())
210           recursivelyDeleteUnusedNodes(N);
211       }
212     }
213 
214     SDNode *getNextWorklistEntry() {
215       // Before we do any work, remove nodes that are not in use.
216       clearAddedDanglingWorklistEntries();
217       SDNode *N = nullptr;
218       // The Worklist holds the SDNodes in order, but it may contain null
219       // entries.
220       while (!N && !Worklist.empty()) {
221         N = Worklist.pop_back_val();
222       }
223 
224       if (N) {
225         bool GoodWorklistEntry = WorklistMap.erase(N);
226         (void)GoodWorklistEntry;
227         assert(GoodWorklistEntry &&
228                "Found a worklist entry without a corresponding map entry!");
229       }
230       return N;
231     }
232 
233     /// Call the node-specific routine that folds each particular type of node.
234     SDValue visit(SDNode *N);
235 
236   public:
237     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
238         : DAG(D), TLI(D.getTargetLoweringInfo()),
239           STI(D.getSubtarget().getSelectionDAGInfo()),
240           Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
241       ForCodeSize = DAG.shouldOptForSize();
242       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
243 
244       MaximumLegalStoreInBits = 0;
245       // We use the minimum store size here, since that's all we can guarantee
246       // for the scalable vector types.
247       for (MVT VT : MVT::all_valuetypes())
248         if (EVT(VT).isSimple() && VT != MVT::Other &&
249             TLI.isTypeLegal(EVT(VT)) &&
250             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
251           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
252     }
253 
254     void ConsiderForPruning(SDNode *N) {
255       // Mark this for potential pruning.
256       PruningList.insert(N);
257     }
258 
259     /// Add to the worklist making sure its instance is at the back (next to be
260     /// processed.)
261     void AddToWorklist(SDNode *N) {
262       assert(N->getOpcode() != ISD::DELETED_NODE &&
263              "Deleted Node added to Worklist");
264 
265       // Skip handle nodes as they can't usefully be combined and confuse the
266       // zero-use deletion strategy.
267       if (N->getOpcode() == ISD::HANDLENODE)
268         return;
269 
270       ConsiderForPruning(N);
271 
272       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
273         Worklist.push_back(N);
274     }
275 
276     /// Remove all instances of N from the worklist.
277     void removeFromWorklist(SDNode *N) {
278       CombinedNodes.erase(N);
279       PruningList.remove(N);
280       StoreRootCountMap.erase(N);
281 
282       auto It = WorklistMap.find(N);
283       if (It == WorklistMap.end())
284         return; // Not in the worklist.
285 
286       // Null out the entry rather than erasing it to avoid a linear operation.
287       Worklist[It->second] = nullptr;
288       WorklistMap.erase(It);
289     }
290 
291     void deleteAndRecombine(SDNode *N);
292     bool recursivelyDeleteUnusedNodes(SDNode *N);
293 
294     /// Replaces all uses of the results of one DAG node with new values.
295     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
296                       bool AddTo = true);
297 
298     /// Replaces all uses of the results of one DAG node with new values.
299     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
300       return CombineTo(N, &Res, 1, AddTo);
301     }
302 
303     /// Replaces all uses of the results of one DAG node with new values.
304     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
305                       bool AddTo = true) {
306       SDValue To[] = { Res0, Res1 };
307       return CombineTo(N, To, 2, AddTo);
308     }
309 
310     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
311 
312   private:
313     unsigned MaximumLegalStoreInBits;
314 
315     /// Check the specified integer node value to see if it can be simplified or
316     /// if things it uses can be simplified by bit propagation.
317     /// If so, return true.
318     bool SimplifyDemandedBits(SDValue Op) {
319       unsigned BitWidth = Op.getScalarValueSizeInBits();
320       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
321       return SimplifyDemandedBits(Op, DemandedBits);
322     }
323 
324     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
325       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
326       KnownBits Known;
327       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
328         return false;
329 
330       // Revisit the node.
331       AddToWorklist(Op.getNode());
332 
333       CommitTargetLoweringOpt(TLO);
334       return true;
335     }
336 
337     /// Check the specified vector node value to see if it can be simplified or
338     /// if things it uses can be simplified as it only uses some of the
339     /// elements. If so, return true.
340     bool SimplifyDemandedVectorElts(SDValue Op) {
341       // TODO: For now just pretend it cannot be simplified.
342       if (Op.getValueType().isScalableVector())
343         return false;
344 
345       unsigned NumElts = Op.getValueType().getVectorNumElements();
346       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
347       return SimplifyDemandedVectorElts(Op, DemandedElts);
348     }
349 
350     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
351                               const APInt &DemandedElts,
352                               bool AssumeSingleUse = false);
353     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
354                                     bool AssumeSingleUse = false);
355 
356     bool CombineToPreIndexedLoadStore(SDNode *N);
357     bool CombineToPostIndexedLoadStore(SDNode *N);
358     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
359     bool SliceUpLoad(SDNode *N);
360 
361     // Scalars have size 0 to distinguish from singleton vectors.
362     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
363     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
364     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
365 
366     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
367     ///   load.
368     ///
369     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
370     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
371     /// \param EltNo index of the vector element to load.
372     /// \param OriginalLoad load that EVE came from to be replaced.
373     /// \returns EVE on success SDValue() on failure.
374     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
375                                          SDValue EltNo,
376                                          LoadSDNode *OriginalLoad);
377     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
378     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
379     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
380     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
381     SDValue PromoteIntBinOp(SDValue Op);
382     SDValue PromoteIntShiftOp(SDValue Op);
383     SDValue PromoteExtend(SDValue Op);
384     bool PromoteLoad(SDValue Op);
385 
386     /// Call the node-specific routine that knows how to fold each
387     /// particular type of node. If that doesn't do anything, try the
388     /// target-specific DAG combines.
389     SDValue combine(SDNode *N);
390 
391     // Visitation implementation - Implement dag node combining for different
392     // node types.  The semantics are as follows:
393     // Return Value:
394     //   SDValue.getNode() == 0 - No change was made
395     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
396     //   otherwise              - N should be replaced by the returned Operand.
397     //
398     SDValue visitTokenFactor(SDNode *N);
399     SDValue visitMERGE_VALUES(SDNode *N);
400     SDValue visitADD(SDNode *N);
401     SDValue visitADDLike(SDNode *N);
402     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
403     SDValue visitSUB(SDNode *N);
404     SDValue visitADDSAT(SDNode *N);
405     SDValue visitSUBSAT(SDNode *N);
406     SDValue visitADDC(SDNode *N);
407     SDValue visitADDO(SDNode *N);
408     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
409     SDValue visitSUBC(SDNode *N);
410     SDValue visitSUBO(SDNode *N);
411     SDValue visitADDE(SDNode *N);
412     SDValue visitADDCARRY(SDNode *N);
413     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
414     SDValue visitSUBE(SDNode *N);
415     SDValue visitSUBCARRY(SDNode *N);
416     SDValue visitMUL(SDNode *N);
417     SDValue visitMULFIX(SDNode *N);
418     SDValue useDivRem(SDNode *N);
419     SDValue visitSDIV(SDNode *N);
420     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
421     SDValue visitUDIV(SDNode *N);
422     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
423     SDValue visitREM(SDNode *N);
424     SDValue visitMULHU(SDNode *N);
425     SDValue visitMULHS(SDNode *N);
426     SDValue visitSMUL_LOHI(SDNode *N);
427     SDValue visitUMUL_LOHI(SDNode *N);
428     SDValue visitMULO(SDNode *N);
429     SDValue visitIMINMAX(SDNode *N);
430     SDValue visitAND(SDNode *N);
431     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
432     SDValue visitOR(SDNode *N);
433     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
434     SDValue visitXOR(SDNode *N);
435     SDValue SimplifyVBinOp(SDNode *N);
436     SDValue visitSHL(SDNode *N);
437     SDValue visitSRA(SDNode *N);
438     SDValue visitSRL(SDNode *N);
439     SDValue visitFunnelShift(SDNode *N);
440     SDValue visitRotate(SDNode *N);
441     SDValue visitABS(SDNode *N);
442     SDValue visitBSWAP(SDNode *N);
443     SDValue visitBITREVERSE(SDNode *N);
444     SDValue visitCTLZ(SDNode *N);
445     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
446     SDValue visitCTTZ(SDNode *N);
447     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
448     SDValue visitCTPOP(SDNode *N);
449     SDValue visitSELECT(SDNode *N);
450     SDValue visitVSELECT(SDNode *N);
451     SDValue visitSELECT_CC(SDNode *N);
452     SDValue visitSETCC(SDNode *N);
453     SDValue visitSETCCCARRY(SDNode *N);
454     SDValue visitSIGN_EXTEND(SDNode *N);
455     SDValue visitZERO_EXTEND(SDNode *N);
456     SDValue visitANY_EXTEND(SDNode *N);
457     SDValue visitAssertExt(SDNode *N);
458     SDValue visitAssertAlign(SDNode *N);
459     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
460     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
461     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
462     SDValue visitTRUNCATE(SDNode *N);
463     SDValue visitBITCAST(SDNode *N);
464     SDValue visitFREEZE(SDNode *N);
465     SDValue visitBUILD_PAIR(SDNode *N);
466     SDValue visitFADD(SDNode *N);
467     SDValue visitFSUB(SDNode *N);
468     SDValue visitFMUL(SDNode *N);
469     SDValue visitFMA(SDNode *N);
470     SDValue visitFDIV(SDNode *N);
471     SDValue visitFREM(SDNode *N);
472     SDValue visitFSQRT(SDNode *N);
473     SDValue visitFCOPYSIGN(SDNode *N);
474     SDValue visitFPOW(SDNode *N);
475     SDValue visitSINT_TO_FP(SDNode *N);
476     SDValue visitUINT_TO_FP(SDNode *N);
477     SDValue visitFP_TO_SINT(SDNode *N);
478     SDValue visitFP_TO_UINT(SDNode *N);
479     SDValue visitFP_ROUND(SDNode *N);
480     SDValue visitFP_EXTEND(SDNode *N);
481     SDValue visitFNEG(SDNode *N);
482     SDValue visitFABS(SDNode *N);
483     SDValue visitFCEIL(SDNode *N);
484     SDValue visitFTRUNC(SDNode *N);
485     SDValue visitFFLOOR(SDNode *N);
486     SDValue visitFMINNUM(SDNode *N);
487     SDValue visitFMAXNUM(SDNode *N);
488     SDValue visitFMINIMUM(SDNode *N);
489     SDValue visitFMAXIMUM(SDNode *N);
490     SDValue visitBRCOND(SDNode *N);
491     SDValue visitBR_CC(SDNode *N);
492     SDValue visitLOAD(SDNode *N);
493 
494     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
495     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
496 
497     SDValue visitSTORE(SDNode *N);
498     SDValue visitLIFETIME_END(SDNode *N);
499     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
500     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
501     SDValue visitBUILD_VECTOR(SDNode *N);
502     SDValue visitCONCAT_VECTORS(SDNode *N);
503     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
504     SDValue visitVECTOR_SHUFFLE(SDNode *N);
505     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
506     SDValue visitINSERT_SUBVECTOR(SDNode *N);
507     SDValue visitMLOAD(SDNode *N);
508     SDValue visitMSTORE(SDNode *N);
509     SDValue visitMGATHER(SDNode *N);
510     SDValue visitMSCATTER(SDNode *N);
511     SDValue visitFP_TO_FP16(SDNode *N);
512     SDValue visitFP16_TO_FP(SDNode *N);
513     SDValue visitVECREDUCE(SDNode *N);
514 
515     SDValue visitFADDForFMACombine(SDNode *N);
516     SDValue visitFSUBForFMACombine(SDNode *N);
517     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
518 
519     SDValue XformToShuffleWithZero(SDNode *N);
520     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
521                                                     const SDLoc &DL, SDValue N0,
522                                                     SDValue N1);
523     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
524                                       SDValue N1);
525     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
526                            SDValue N1, SDNodeFlags Flags);
527 
528     SDValue visitShiftByConstant(SDNode *N);
529 
530     SDValue foldSelectOfConstants(SDNode *N);
531     SDValue foldVSelectOfConstants(SDNode *N);
532     SDValue foldBinOpIntoSelect(SDNode *BO);
533     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
534     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
535     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
536     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
537                              SDValue N2, SDValue N3, ISD::CondCode CC,
538                              bool NotExtCompare = false);
539     SDValue convertSelectOfFPConstantsToLoadOffset(
540         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
541         ISD::CondCode CC);
542     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
543                                    SDValue N2, SDValue N3, ISD::CondCode CC);
544     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
545                               const SDLoc &DL);
546     SDValue unfoldMaskedMerge(SDNode *N);
547     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
548     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
549                           const SDLoc &DL, bool foldBooleans);
550     SDValue rebuildSetCC(SDValue N);
551 
552     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
553                            SDValue &CC, bool MatchStrict = false) const;
554     bool isOneUseSetCC(SDValue N) const;
555 
556     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
557                                          unsigned HiOp);
558     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
559     SDValue CombineExtLoad(SDNode *N);
560     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
561     SDValue combineRepeatedFPDivisors(SDNode *N);
562     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
563     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
564     SDValue BuildSDIV(SDNode *N);
565     SDValue BuildSDIVPow2(SDNode *N);
566     SDValue BuildUDIV(SDNode *N);
567     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
568     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
569     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
570     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
571     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
572     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
573                                 SDNodeFlags Flags, bool Reciprocal);
574     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
575                                 SDNodeFlags Flags, bool Reciprocal);
576     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
577                                bool DemandHighBits = true);
578     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
579     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
580                               SDValue InnerPos, SDValue InnerNeg,
581                               unsigned PosOpcode, unsigned NegOpcode,
582                               const SDLoc &DL);
583     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
584                               SDValue InnerPos, SDValue InnerNeg,
585                               unsigned PosOpcode, unsigned NegOpcode,
586                               const SDLoc &DL);
587     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
588     SDValue MatchLoadCombine(SDNode *N);
589     SDValue MatchStoreCombine(StoreSDNode *N);
590     SDValue ReduceLoadWidth(SDNode *N);
591     SDValue ReduceLoadOpStoreWidth(SDNode *N);
592     SDValue splitMergedValStore(StoreSDNode *ST);
593     SDValue TransformFPLoadStorePair(SDNode *N);
594     SDValue convertBuildVecZextToZext(SDNode *N);
595     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
596     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
597     SDValue reduceBuildVecToShuffle(SDNode *N);
598     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
599                                   ArrayRef<int> VectorMask, SDValue VecIn1,
600                                   SDValue VecIn2, unsigned LeftIdx,
601                                   bool DidSplitVec);
602     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
603 
604     /// Walk up chain skipping non-aliasing memory nodes,
605     /// looking for aliasing nodes and adding them to the Aliases vector.
606     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
607                           SmallVectorImpl<SDValue> &Aliases);
608 
609     /// Return true if there is any possibility that the two addresses overlap.
610     bool isAlias(SDNode *Op0, SDNode *Op1) const;
611 
612     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
613     /// chain (aliasing node.)
614     SDValue FindBetterChain(SDNode *N, SDValue Chain);
615 
616     /// Try to replace a store and any possibly adjacent stores on
617     /// consecutive chains with better chains. Return true only if St is
618     /// replaced.
619     ///
620     /// Notice that other chains may still be replaced even if the function
621     /// returns false.
622     bool findBetterNeighborChains(StoreSDNode *St);
623 
624     // Helper for findBetterNeighborChains. Walk up store chain add additional
625     // chained stores that do not overlap and can be parallelized.
626     bool parallelizeChainedStores(StoreSDNode *St);
627 
628     /// Holds a pointer to an LSBaseSDNode as well as information on where it
629     /// is located in a sequence of memory operations connected by a chain.
630     struct MemOpLink {
631       // Ptr to the mem node.
632       LSBaseSDNode *MemNode;
633 
634       // Offset from the base ptr.
635       int64_t OffsetFromBase;
636 
637       MemOpLink(LSBaseSDNode *N, int64_t Offset)
638           : MemNode(N), OffsetFromBase(Offset) {}
639     };
640 
641     // Classify the origin of a stored value.
642     enum class StoreSource { Unknown, Constant, Extract, Load };
643     StoreSource getStoreSource(SDValue StoreVal) {
644       if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal))
645         return StoreSource::Constant;
646       if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
647           StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR)
648         return StoreSource::Extract;
649       if (isa<LoadSDNode>(StoreVal))
650         return StoreSource::Load;
651       return StoreSource::Unknown;
652     }
653 
654     /// This is a helper function for visitMUL to check the profitability
655     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
656     /// MulNode is the original multiply, AddNode is (add x, c1),
657     /// and ConstNode is c2.
658     bool isMulAddWithConstProfitable(SDNode *MulNode,
659                                      SDValue &AddNode,
660                                      SDValue &ConstNode);
661 
662     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
663     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
664     /// the type of the loaded value to be extended.
665     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
666                           EVT LoadResultTy, EVT &ExtVT);
667 
668     /// Helper function to calculate whether the given Load/Store can have its
669     /// width reduced to ExtVT.
670     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
671                            EVT &MemVT, unsigned ShAmt = 0);
672 
673     /// Used by BackwardsPropagateMask to find suitable loads.
674     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
675                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
676                            ConstantSDNode *Mask, SDNode *&NodeToMask);
677     /// Attempt to propagate a given AND node back to load leaves so that they
678     /// can be combined into narrow loads.
679     bool BackwardsPropagateMask(SDNode *N);
680 
681     /// Helper function for mergeConsecutiveStores which merges the component
682     /// store chains.
683     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
684                                 unsigned NumStores);
685 
686     /// This is a helper function for mergeConsecutiveStores. When the source
687     /// elements of the consecutive stores are all constants or all extracted
688     /// vector elements, try to merge them into one larger store introducing
689     /// bitcasts if necessary.  \return True if a merged store was created.
690     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
691                                          EVT MemVT, unsigned NumStores,
692                                          bool IsConstantSrc, bool UseVector,
693                                          bool UseTrunc);
694 
695     /// This is a helper function for mergeConsecutiveStores. Stores that
696     /// potentially may be merged with St are placed in StoreNodes. RootNode is
697     /// a chain predecessor to all store candidates.
698     void getStoreMergeCandidates(StoreSDNode *St,
699                                  SmallVectorImpl<MemOpLink> &StoreNodes,
700                                  SDNode *&Root);
701 
702     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
703     /// have indirect dependency through their operands. RootNode is the
704     /// predecessor to all stores calculated by getStoreMergeCandidates and is
705     /// used to prune the dependency check. \return True if safe to merge.
706     bool checkMergeStoreCandidatesForDependencies(
707         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
708         SDNode *RootNode);
709 
710     /// This is a helper function for mergeConsecutiveStores. Given a list of
711     /// store candidates, find the first N that are consecutive in memory.
712     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
713     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
714                                   int64_t ElementSizeBytes) const;
715 
716     /// This is a helper function for mergeConsecutiveStores. It is used for
717     /// store chains that are composed entirely of constant values.
718     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
719                                   unsigned NumConsecutiveStores,
720                                   EVT MemVT, SDNode *Root, bool AllowVectors);
721 
722     /// This is a helper function for mergeConsecutiveStores. It is used for
723     /// store chains that are composed entirely of extracted vector elements.
724     /// When extracting multiple vector elements, try to store them in one
725     /// vector store rather than a sequence of scalar stores.
726     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
727                                  unsigned NumConsecutiveStores, EVT MemVT,
728                                  SDNode *Root);
729 
730     /// This is a helper function for mergeConsecutiveStores. It is used for
731     /// store chains that are composed entirely of loaded values.
732     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
733                               unsigned NumConsecutiveStores, EVT MemVT,
734                               SDNode *Root, bool AllowVectors,
735                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
736 
737     /// Merge consecutive store operations into a wide store.
738     /// This optimization uses wide integers or vectors when possible.
739     /// \return true if stores were merged.
740     bool mergeConsecutiveStores(StoreSDNode *St);
741 
742     /// Try to transform a truncation where C is a constant:
743     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
744     ///
745     /// \p N needs to be a truncation and its first operand an AND. Other
746     /// requirements are checked by the function (e.g. that trunc is
747     /// single-use) and if missed an empty SDValue is returned.
748     SDValue distributeTruncateThroughAnd(SDNode *N);
749 
750     /// Helper function to determine whether the target supports operation
751     /// given by \p Opcode for type \p VT, that is, whether the operation
752     /// is legal or custom before legalizing operations, and whether is
753     /// legal (but not custom) after legalization.
754     bool hasOperation(unsigned Opcode, EVT VT) {
755       if (LegalOperations)
756         return TLI.isOperationLegal(Opcode, VT);
757       return TLI.isOperationLegalOrCustom(Opcode, VT);
758     }
759 
760   public:
761     /// Runs the dag combiner on all nodes in the work list
762     void Run(CombineLevel AtLevel);
763 
764     SelectionDAG &getDAG() const { return DAG; }
765 
766     /// Returns a type large enough to hold any valid shift amount - before type
767     /// legalization these can be huge.
768     EVT getShiftAmountTy(EVT LHSTy) {
769       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
770       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
771     }
772 
773     /// This method returns true if we are running before type legalization or
774     /// if the specified VT is legal.
775     bool isTypeLegal(const EVT &VT) {
776       if (!LegalTypes) return true;
777       return TLI.isTypeLegal(VT);
778     }
779 
780     /// Convenience wrapper around TargetLowering::getSetCCResultType
781     EVT getSetCCResultType(EVT VT) const {
782       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
783     }
784 
785     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
786                          SDValue OrigLoad, SDValue ExtLoad,
787                          ISD::NodeType ExtType);
788   };
789 
790 /// This class is a DAGUpdateListener that removes any deleted
791 /// nodes from the worklist.
792 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
793   DAGCombiner &DC;
794 
795 public:
796   explicit WorklistRemover(DAGCombiner &dc)
797     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
798 
799   void NodeDeleted(SDNode *N, SDNode *E) override {
800     DC.removeFromWorklist(N);
801   }
802 };
803 
804 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
805   DAGCombiner &DC;
806 
807 public:
808   explicit WorklistInserter(DAGCombiner &dc)
809       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
810 
811   // FIXME: Ideally we could add N to the worklist, but this causes exponential
812   //        compile time costs in large DAGs, e.g. Halide.
813   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
814 };
815 
816 } // end anonymous namespace
817 
818 //===----------------------------------------------------------------------===//
819 //  TargetLowering::DAGCombinerInfo implementation
820 //===----------------------------------------------------------------------===//
821 
822 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
823   ((DAGCombiner*)DC)->AddToWorklist(N);
824 }
825 
826 SDValue TargetLowering::DAGCombinerInfo::
827 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
828   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
829 }
830 
831 SDValue TargetLowering::DAGCombinerInfo::
832 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
833   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
834 }
835 
836 SDValue TargetLowering::DAGCombinerInfo::
837 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
838   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
839 }
840 
841 bool TargetLowering::DAGCombinerInfo::
842 recursivelyDeleteUnusedNodes(SDNode *N) {
843   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
844 }
845 
846 void TargetLowering::DAGCombinerInfo::
847 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
848   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
849 }
850 
851 //===----------------------------------------------------------------------===//
852 // Helper Functions
853 //===----------------------------------------------------------------------===//
854 
855 void DAGCombiner::deleteAndRecombine(SDNode *N) {
856   removeFromWorklist(N);
857 
858   // If the operands of this node are only used by the node, they will now be
859   // dead. Make sure to re-visit them and recursively delete dead nodes.
860   for (const SDValue &Op : N->ops())
861     // For an operand generating multiple values, one of the values may
862     // become dead allowing further simplification (e.g. split index
863     // arithmetic from an indexed load).
864     if (Op->hasOneUse() || Op->getNumValues() > 1)
865       AddToWorklist(Op.getNode());
866 
867   DAG.DeleteNode(N);
868 }
869 
870 // APInts must be the same size for most operations, this helper
871 // function zero extends the shorter of the pair so that they match.
872 // We provide an Offset so that we can create bitwidths that won't overflow.
873 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
874   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
875   LHS = LHS.zextOrSelf(Bits);
876   RHS = RHS.zextOrSelf(Bits);
877 }
878 
879 // Return true if this node is a setcc, or is a select_cc
880 // that selects between the target values used for true and false, making it
881 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
882 // the appropriate nodes based on the type of node we are checking. This
883 // simplifies life a bit for the callers.
884 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
885                                     SDValue &CC, bool MatchStrict) const {
886   if (N.getOpcode() == ISD::SETCC) {
887     LHS = N.getOperand(0);
888     RHS = N.getOperand(1);
889     CC  = N.getOperand(2);
890     return true;
891   }
892 
893   if (MatchStrict &&
894       (N.getOpcode() == ISD::STRICT_FSETCC ||
895        N.getOpcode() == ISD::STRICT_FSETCCS)) {
896     LHS = N.getOperand(1);
897     RHS = N.getOperand(2);
898     CC  = N.getOperand(3);
899     return true;
900   }
901 
902   if (N.getOpcode() != ISD::SELECT_CC ||
903       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
904       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
905     return false;
906 
907   if (TLI.getBooleanContents(N.getValueType()) ==
908       TargetLowering::UndefinedBooleanContent)
909     return false;
910 
911   LHS = N.getOperand(0);
912   RHS = N.getOperand(1);
913   CC  = N.getOperand(4);
914   return true;
915 }
916 
917 /// Return true if this is a SetCC-equivalent operation with only one use.
918 /// If this is true, it allows the users to invert the operation for free when
919 /// it is profitable to do so.
920 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
921   SDValue N0, N1, N2;
922   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
923     return true;
924   return false;
925 }
926 
927 // Returns the SDNode if it is a constant float BuildVector
928 // or constant float.
929 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
930   if (isa<ConstantFPSDNode>(N))
931     return N.getNode();
932   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
933     return N.getNode();
934   return nullptr;
935 }
936 
937 // Determines if it is a constant integer or a build vector of constant
938 // integers (and undefs).
939 // Do not permit build vector implicit truncation.
940 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
941   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
942     return !(Const->isOpaque() && NoOpaques);
943   if (N.getOpcode() != ISD::BUILD_VECTOR)
944     return false;
945   unsigned BitWidth = N.getScalarValueSizeInBits();
946   for (const SDValue &Op : N->op_values()) {
947     if (Op.isUndef())
948       continue;
949     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
950     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
951         (Const->isOpaque() && NoOpaques))
952       return false;
953   }
954   return true;
955 }
956 
957 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
958 // undef's.
959 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
960   if (V.getOpcode() != ISD::BUILD_VECTOR)
961     return false;
962   return isConstantOrConstantVector(V, NoOpaques) ||
963          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
964 }
965 
966 // Determine if this an indexed load with an opaque target constant index.
967 static bool canSplitIdx(LoadSDNode *LD) {
968   return MaySplitLoadIndex &&
969          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
970           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
971 }
972 
973 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
974                                                              const SDLoc &DL,
975                                                              SDValue N0,
976                                                              SDValue N1) {
977   // Currently this only tries to ensure we don't undo the GEP splits done by
978   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
979   // we check if the following transformation would be problematic:
980   // (load/store (add, (add, x, offset1), offset2)) ->
981   // (load/store (add, x, offset1+offset2)).
982 
983   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
984     return false;
985 
986   if (N0.hasOneUse())
987     return false;
988 
989   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
990   auto *C2 = dyn_cast<ConstantSDNode>(N1);
991   if (!C1 || !C2)
992     return false;
993 
994   const APInt &C1APIntVal = C1->getAPIntValue();
995   const APInt &C2APIntVal = C2->getAPIntValue();
996   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
997     return false;
998 
999   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1000   if (CombinedValueIntVal.getBitWidth() > 64)
1001     return false;
1002   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1003 
1004   for (SDNode *Node : N0->uses()) {
1005     auto LoadStore = dyn_cast<MemSDNode>(Node);
1006     if (LoadStore) {
1007       // Is x[offset2] already not a legal addressing mode? If so then
1008       // reassociating the constants breaks nothing (we test offset2 because
1009       // that's the one we hope to fold into the load or store).
1010       TargetLoweringBase::AddrMode AM;
1011       AM.HasBaseReg = true;
1012       AM.BaseOffs = C2APIntVal.getSExtValue();
1013       EVT VT = LoadStore->getMemoryVT();
1014       unsigned AS = LoadStore->getAddressSpace();
1015       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1016       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1017         continue;
1018 
1019       // Would x[offset1+offset2] still be a legal addressing mode?
1020       AM.BaseOffs = CombinedValue;
1021       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1022         return true;
1023     }
1024   }
1025 
1026   return false;
1027 }
1028 
1029 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1030 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1031 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1032                                                SDValue N0, SDValue N1) {
1033   EVT VT = N0.getValueType();
1034 
1035   if (N0.getOpcode() != Opc)
1036     return SDValue();
1037 
1038   if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1039     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1040       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1041       if (SDValue OpNode =
1042               DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1043         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1044       return SDValue();
1045     }
1046     if (N0.hasOneUse()) {
1047       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1048       //              iff (op x, c1) has one use
1049       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1050       if (!OpNode.getNode())
1051         return SDValue();
1052       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1053     }
1054   }
1055   return SDValue();
1056 }
1057 
1058 // Try to reassociate commutative binops.
1059 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1060                                     SDValue N1, SDNodeFlags Flags) {
1061   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1062 
1063   // Floating-point reassociation is not allowed without loose FP math.
1064   if (N0.getValueType().isFloatingPoint() ||
1065       N1.getValueType().isFloatingPoint())
1066     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1067       return SDValue();
1068 
1069   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1070     return Combined;
1071   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1072     return Combined;
1073   return SDValue();
1074 }
1075 
1076 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1077                                bool AddTo) {
1078   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1079   ++NodesCombined;
1080   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1081              To[0].getNode()->dump(&DAG);
1082              dbgs() << " and " << NumTo - 1 << " other values\n");
1083   for (unsigned i = 0, e = NumTo; i != e; ++i)
1084     assert((!To[i].getNode() ||
1085             N->getValueType(i) == To[i].getValueType()) &&
1086            "Cannot combine value to value of different type!");
1087 
1088   WorklistRemover DeadNodes(*this);
1089   DAG.ReplaceAllUsesWith(N, To);
1090   if (AddTo) {
1091     // Push the new nodes and any users onto the worklist
1092     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1093       if (To[i].getNode()) {
1094         AddToWorklist(To[i].getNode());
1095         AddUsersToWorklist(To[i].getNode());
1096       }
1097     }
1098   }
1099 
1100   // Finally, if the node is now dead, remove it from the graph.  The node
1101   // may not be dead if the replacement process recursively simplified to
1102   // something else needing this node.
1103   if (N->use_empty())
1104     deleteAndRecombine(N);
1105   return SDValue(N, 0);
1106 }
1107 
1108 void DAGCombiner::
1109 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1110   // Replace the old value with the new one.
1111   ++NodesCombined;
1112   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1113              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1114              dbgs() << '\n');
1115 
1116   // Replace all uses.  If any nodes become isomorphic to other nodes and
1117   // are deleted, make sure to remove them from our worklist.
1118   WorklistRemover DeadNodes(*this);
1119   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1120 
1121   // Push the new node and any (possibly new) users onto the worklist.
1122   AddToWorklistWithUsers(TLO.New.getNode());
1123 
1124   // Finally, if the node is now dead, remove it from the graph.  The node
1125   // may not be dead if the replacement process recursively simplified to
1126   // something else needing this node.
1127   if (TLO.Old.getNode()->use_empty())
1128     deleteAndRecombine(TLO.Old.getNode());
1129 }
1130 
1131 /// Check the specified integer node value to see if it can be simplified or if
1132 /// things it uses can be simplified by bit propagation. If so, return true.
1133 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1134                                        const APInt &DemandedElts,
1135                                        bool AssumeSingleUse) {
1136   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1137   KnownBits Known;
1138   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1139                                 AssumeSingleUse))
1140     return false;
1141 
1142   // Revisit the node.
1143   AddToWorklist(Op.getNode());
1144 
1145   CommitTargetLoweringOpt(TLO);
1146   return true;
1147 }
1148 
1149 /// Check the specified vector node value to see if it can be simplified or
1150 /// if things it uses can be simplified as it only uses some of the elements.
1151 /// If so, return true.
1152 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1153                                              const APInt &DemandedElts,
1154                                              bool AssumeSingleUse) {
1155   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1156   APInt KnownUndef, KnownZero;
1157   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1158                                       TLO, 0, AssumeSingleUse))
1159     return false;
1160 
1161   // Revisit the node.
1162   AddToWorklist(Op.getNode());
1163 
1164   CommitTargetLoweringOpt(TLO);
1165   return true;
1166 }
1167 
1168 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1169   SDLoc DL(Load);
1170   EVT VT = Load->getValueType(0);
1171   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1172 
1173   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1174              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1175   WorklistRemover DeadNodes(*this);
1176   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1177   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1178   deleteAndRecombine(Load);
1179   AddToWorklist(Trunc.getNode());
1180 }
1181 
1182 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1183   Replace = false;
1184   SDLoc DL(Op);
1185   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1186     LoadSDNode *LD = cast<LoadSDNode>(Op);
1187     EVT MemVT = LD->getMemoryVT();
1188     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1189                                                       : LD->getExtensionType();
1190     Replace = true;
1191     return DAG.getExtLoad(ExtType, DL, PVT,
1192                           LD->getChain(), LD->getBasePtr(),
1193                           MemVT, LD->getMemOperand());
1194   }
1195 
1196   unsigned Opc = Op.getOpcode();
1197   switch (Opc) {
1198   default: break;
1199   case ISD::AssertSext:
1200     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1201       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1202     break;
1203   case ISD::AssertZext:
1204     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1205       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1206     break;
1207   case ISD::Constant: {
1208     unsigned ExtOpc =
1209       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1210     return DAG.getNode(ExtOpc, DL, PVT, Op);
1211   }
1212   }
1213 
1214   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1215     return SDValue();
1216   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1217 }
1218 
1219 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1220   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1221     return SDValue();
1222   EVT OldVT = Op.getValueType();
1223   SDLoc DL(Op);
1224   bool Replace = false;
1225   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1226   if (!NewOp.getNode())
1227     return SDValue();
1228   AddToWorklist(NewOp.getNode());
1229 
1230   if (Replace)
1231     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1232   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1233                      DAG.getValueType(OldVT));
1234 }
1235 
1236 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1237   EVT OldVT = Op.getValueType();
1238   SDLoc DL(Op);
1239   bool Replace = false;
1240   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1241   if (!NewOp.getNode())
1242     return SDValue();
1243   AddToWorklist(NewOp.getNode());
1244 
1245   if (Replace)
1246     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1247   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1248 }
1249 
1250 /// Promote the specified integer binary operation if the target indicates it is
1251 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1252 /// i32 since i16 instructions are longer.
1253 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1254   if (!LegalOperations)
1255     return SDValue();
1256 
1257   EVT VT = Op.getValueType();
1258   if (VT.isVector() || !VT.isInteger())
1259     return SDValue();
1260 
1261   // If operation type is 'undesirable', e.g. i16 on x86, consider
1262   // promoting it.
1263   unsigned Opc = Op.getOpcode();
1264   if (TLI.isTypeDesirableForOp(Opc, VT))
1265     return SDValue();
1266 
1267   EVT PVT = VT;
1268   // Consult target whether it is a good idea to promote this operation and
1269   // what's the right type to promote it to.
1270   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1271     assert(PVT != VT && "Don't know what type to promote to!");
1272 
1273     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1274 
1275     bool Replace0 = false;
1276     SDValue N0 = Op.getOperand(0);
1277     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1278 
1279     bool Replace1 = false;
1280     SDValue N1 = Op.getOperand(1);
1281     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1282     SDLoc DL(Op);
1283 
1284     SDValue RV =
1285         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1286 
1287     // We are always replacing N0/N1's use in N and only need additional
1288     // replacements if there are additional uses.
1289     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1290     //       (SDValue) here because the node may reference multiple values
1291     //       (for example, the chain value of a load node).
1292     Replace0 &= !N0->hasOneUse();
1293     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1294 
1295     // Combine Op here so it is preserved past replacements.
1296     CombineTo(Op.getNode(), RV);
1297 
1298     // If operands have a use ordering, make sure we deal with
1299     // predecessor first.
1300     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1301       std::swap(N0, N1);
1302       std::swap(NN0, NN1);
1303     }
1304 
1305     if (Replace0) {
1306       AddToWorklist(NN0.getNode());
1307       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1308     }
1309     if (Replace1) {
1310       AddToWorklist(NN1.getNode());
1311       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1312     }
1313     return Op;
1314   }
1315   return SDValue();
1316 }
1317 
1318 /// Promote the specified integer shift operation if the target indicates it is
1319 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1320 /// i32 since i16 instructions are longer.
1321 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1322   if (!LegalOperations)
1323     return SDValue();
1324 
1325   EVT VT = Op.getValueType();
1326   if (VT.isVector() || !VT.isInteger())
1327     return SDValue();
1328 
1329   // If operation type is 'undesirable', e.g. i16 on x86, consider
1330   // promoting it.
1331   unsigned Opc = Op.getOpcode();
1332   if (TLI.isTypeDesirableForOp(Opc, VT))
1333     return SDValue();
1334 
1335   EVT PVT = VT;
1336   // Consult target whether it is a good idea to promote this operation and
1337   // what's the right type to promote it to.
1338   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1339     assert(PVT != VT && "Don't know what type to promote to!");
1340 
1341     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1342 
1343     bool Replace = false;
1344     SDValue N0 = Op.getOperand(0);
1345     SDValue N1 = Op.getOperand(1);
1346     if (Opc == ISD::SRA)
1347       N0 = SExtPromoteOperand(N0, PVT);
1348     else if (Opc == ISD::SRL)
1349       N0 = ZExtPromoteOperand(N0, PVT);
1350     else
1351       N0 = PromoteOperand(N0, PVT, Replace);
1352 
1353     if (!N0.getNode())
1354       return SDValue();
1355 
1356     SDLoc DL(Op);
1357     SDValue RV =
1358         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1359 
1360     if (Replace)
1361       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1362 
1363     // Deal with Op being deleted.
1364     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1365       return RV;
1366   }
1367   return SDValue();
1368 }
1369 
1370 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1371   if (!LegalOperations)
1372     return SDValue();
1373 
1374   EVT VT = Op.getValueType();
1375   if (VT.isVector() || !VT.isInteger())
1376     return SDValue();
1377 
1378   // If operation type is 'undesirable', e.g. i16 on x86, consider
1379   // promoting it.
1380   unsigned Opc = Op.getOpcode();
1381   if (TLI.isTypeDesirableForOp(Opc, VT))
1382     return SDValue();
1383 
1384   EVT PVT = VT;
1385   // Consult target whether it is a good idea to promote this operation and
1386   // what's the right type to promote it to.
1387   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1388     assert(PVT != VT && "Don't know what type to promote to!");
1389     // fold (aext (aext x)) -> (aext x)
1390     // fold (aext (zext x)) -> (zext x)
1391     // fold (aext (sext x)) -> (sext x)
1392     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1393     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1394   }
1395   return SDValue();
1396 }
1397 
1398 bool DAGCombiner::PromoteLoad(SDValue Op) {
1399   if (!LegalOperations)
1400     return false;
1401 
1402   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1403     return false;
1404 
1405   EVT VT = Op.getValueType();
1406   if (VT.isVector() || !VT.isInteger())
1407     return false;
1408 
1409   // If operation type is 'undesirable', e.g. i16 on x86, consider
1410   // promoting it.
1411   unsigned Opc = Op.getOpcode();
1412   if (TLI.isTypeDesirableForOp(Opc, VT))
1413     return false;
1414 
1415   EVT PVT = VT;
1416   // Consult target whether it is a good idea to promote this operation and
1417   // what's the right type to promote it to.
1418   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1419     assert(PVT != VT && "Don't know what type to promote to!");
1420 
1421     SDLoc DL(Op);
1422     SDNode *N = Op.getNode();
1423     LoadSDNode *LD = cast<LoadSDNode>(N);
1424     EVT MemVT = LD->getMemoryVT();
1425     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1426                                                       : LD->getExtensionType();
1427     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1428                                    LD->getChain(), LD->getBasePtr(),
1429                                    MemVT, LD->getMemOperand());
1430     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1431 
1432     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1433                Result.getNode()->dump(&DAG); dbgs() << '\n');
1434     WorklistRemover DeadNodes(*this);
1435     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1436     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1437     deleteAndRecombine(N);
1438     AddToWorklist(Result.getNode());
1439     return true;
1440   }
1441   return false;
1442 }
1443 
1444 /// Recursively delete a node which has no uses and any operands for
1445 /// which it is the only use.
1446 ///
1447 /// Note that this both deletes the nodes and removes them from the worklist.
1448 /// It also adds any nodes who have had a user deleted to the worklist as they
1449 /// may now have only one use and subject to other combines.
1450 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1451   if (!N->use_empty())
1452     return false;
1453 
1454   SmallSetVector<SDNode *, 16> Nodes;
1455   Nodes.insert(N);
1456   do {
1457     N = Nodes.pop_back_val();
1458     if (!N)
1459       continue;
1460 
1461     if (N->use_empty()) {
1462       for (const SDValue &ChildN : N->op_values())
1463         Nodes.insert(ChildN.getNode());
1464 
1465       removeFromWorklist(N);
1466       DAG.DeleteNode(N);
1467     } else {
1468       AddToWorklist(N);
1469     }
1470   } while (!Nodes.empty());
1471   return true;
1472 }
1473 
1474 //===----------------------------------------------------------------------===//
1475 //  Main DAG Combiner implementation
1476 //===----------------------------------------------------------------------===//
1477 
1478 void DAGCombiner::Run(CombineLevel AtLevel) {
1479   // set the instance variables, so that the various visit routines may use it.
1480   Level = AtLevel;
1481   LegalDAG = Level >= AfterLegalizeDAG;
1482   LegalOperations = Level >= AfterLegalizeVectorOps;
1483   LegalTypes = Level >= AfterLegalizeTypes;
1484 
1485   WorklistInserter AddNodes(*this);
1486 
1487   // Add all the dag nodes to the worklist.
1488   for (SDNode &Node : DAG.allnodes())
1489     AddToWorklist(&Node);
1490 
1491   // Create a dummy node (which is not added to allnodes), that adds a reference
1492   // to the root node, preventing it from being deleted, and tracking any
1493   // changes of the root.
1494   HandleSDNode Dummy(DAG.getRoot());
1495 
1496   // While we have a valid worklist entry node, try to combine it.
1497   while (SDNode *N = getNextWorklistEntry()) {
1498     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1499     // N is deleted from the DAG, since they too may now be dead or may have a
1500     // reduced number of uses, allowing other xforms.
1501     if (recursivelyDeleteUnusedNodes(N))
1502       continue;
1503 
1504     WorklistRemover DeadNodes(*this);
1505 
1506     // If this combine is running after legalizing the DAG, re-legalize any
1507     // nodes pulled off the worklist.
1508     if (LegalDAG) {
1509       SmallSetVector<SDNode *, 16> UpdatedNodes;
1510       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1511 
1512       for (SDNode *LN : UpdatedNodes)
1513         AddToWorklistWithUsers(LN);
1514 
1515       if (!NIsValid)
1516         continue;
1517     }
1518 
1519     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1520 
1521     // Add any operands of the new node which have not yet been combined to the
1522     // worklist as well. Because the worklist uniques things already, this
1523     // won't repeatedly process the same operand.
1524     CombinedNodes.insert(N);
1525     for (const SDValue &ChildN : N->op_values())
1526       if (!CombinedNodes.count(ChildN.getNode()))
1527         AddToWorklist(ChildN.getNode());
1528 
1529     SDValue RV = combine(N);
1530 
1531     if (!RV.getNode())
1532       continue;
1533 
1534     ++NodesCombined;
1535 
1536     // If we get back the same node we passed in, rather than a new node or
1537     // zero, we know that the node must have defined multiple values and
1538     // CombineTo was used.  Since CombineTo takes care of the worklist
1539     // mechanics for us, we have no work to do in this case.
1540     if (RV.getNode() == N)
1541       continue;
1542 
1543     assert(N->getOpcode() != ISD::DELETED_NODE &&
1544            RV.getOpcode() != ISD::DELETED_NODE &&
1545            "Node was deleted but visit returned new node!");
1546 
1547     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1548 
1549     if (N->getNumValues() == RV.getNode()->getNumValues())
1550       DAG.ReplaceAllUsesWith(N, RV.getNode());
1551     else {
1552       assert(N->getValueType(0) == RV.getValueType() &&
1553              N->getNumValues() == 1 && "Type mismatch");
1554       DAG.ReplaceAllUsesWith(N, &RV);
1555     }
1556 
1557     // Push the new node and any users onto the worklist
1558     AddToWorklist(RV.getNode());
1559     AddUsersToWorklist(RV.getNode());
1560 
1561     // Finally, if the node is now dead, remove it from the graph.  The node
1562     // may not be dead if the replacement process recursively simplified to
1563     // something else needing this node. This will also take care of adding any
1564     // operands which have lost a user to the worklist.
1565     recursivelyDeleteUnusedNodes(N);
1566   }
1567 
1568   // If the root changed (e.g. it was a dead load, update the root).
1569   DAG.setRoot(Dummy.getValue());
1570   DAG.RemoveDeadNodes();
1571 }
1572 
1573 SDValue DAGCombiner::visit(SDNode *N) {
1574   switch (N->getOpcode()) {
1575   default: break;
1576   case ISD::TokenFactor:        return visitTokenFactor(N);
1577   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1578   case ISD::ADD:                return visitADD(N);
1579   case ISD::SUB:                return visitSUB(N);
1580   case ISD::SADDSAT:
1581   case ISD::UADDSAT:            return visitADDSAT(N);
1582   case ISD::SSUBSAT:
1583   case ISD::USUBSAT:            return visitSUBSAT(N);
1584   case ISD::ADDC:               return visitADDC(N);
1585   case ISD::SADDO:
1586   case ISD::UADDO:              return visitADDO(N);
1587   case ISD::SUBC:               return visitSUBC(N);
1588   case ISD::SSUBO:
1589   case ISD::USUBO:              return visitSUBO(N);
1590   case ISD::ADDE:               return visitADDE(N);
1591   case ISD::ADDCARRY:           return visitADDCARRY(N);
1592   case ISD::SUBE:               return visitSUBE(N);
1593   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1594   case ISD::SMULFIX:
1595   case ISD::SMULFIXSAT:
1596   case ISD::UMULFIX:
1597   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1598   case ISD::MUL:                return visitMUL(N);
1599   case ISD::SDIV:               return visitSDIV(N);
1600   case ISD::UDIV:               return visitUDIV(N);
1601   case ISD::SREM:
1602   case ISD::UREM:               return visitREM(N);
1603   case ISD::MULHU:              return visitMULHU(N);
1604   case ISD::MULHS:              return visitMULHS(N);
1605   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1606   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1607   case ISD::SMULO:
1608   case ISD::UMULO:              return visitMULO(N);
1609   case ISD::SMIN:
1610   case ISD::SMAX:
1611   case ISD::UMIN:
1612   case ISD::UMAX:               return visitIMINMAX(N);
1613   case ISD::AND:                return visitAND(N);
1614   case ISD::OR:                 return visitOR(N);
1615   case ISD::XOR:                return visitXOR(N);
1616   case ISD::SHL:                return visitSHL(N);
1617   case ISD::SRA:                return visitSRA(N);
1618   case ISD::SRL:                return visitSRL(N);
1619   case ISD::ROTR:
1620   case ISD::ROTL:               return visitRotate(N);
1621   case ISD::FSHL:
1622   case ISD::FSHR:               return visitFunnelShift(N);
1623   case ISD::ABS:                return visitABS(N);
1624   case ISD::BSWAP:              return visitBSWAP(N);
1625   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1626   case ISD::CTLZ:               return visitCTLZ(N);
1627   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1628   case ISD::CTTZ:               return visitCTTZ(N);
1629   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1630   case ISD::CTPOP:              return visitCTPOP(N);
1631   case ISD::SELECT:             return visitSELECT(N);
1632   case ISD::VSELECT:            return visitVSELECT(N);
1633   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1634   case ISD::SETCC:              return visitSETCC(N);
1635   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1636   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1637   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1638   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1639   case ISD::AssertSext:
1640   case ISD::AssertZext:         return visitAssertExt(N);
1641   case ISD::AssertAlign:        return visitAssertAlign(N);
1642   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1643   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1644   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1645   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1646   case ISD::BITCAST:            return visitBITCAST(N);
1647   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1648   case ISD::FADD:               return visitFADD(N);
1649   case ISD::FSUB:               return visitFSUB(N);
1650   case ISD::FMUL:               return visitFMUL(N);
1651   case ISD::FMA:                return visitFMA(N);
1652   case ISD::FDIV:               return visitFDIV(N);
1653   case ISD::FREM:               return visitFREM(N);
1654   case ISD::FSQRT:              return visitFSQRT(N);
1655   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1656   case ISD::FPOW:               return visitFPOW(N);
1657   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1658   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1659   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1660   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1661   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1662   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1663   case ISD::FNEG:               return visitFNEG(N);
1664   case ISD::FABS:               return visitFABS(N);
1665   case ISD::FFLOOR:             return visitFFLOOR(N);
1666   case ISD::FMINNUM:            return visitFMINNUM(N);
1667   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1668   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1669   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1670   case ISD::FCEIL:              return visitFCEIL(N);
1671   case ISD::FTRUNC:             return visitFTRUNC(N);
1672   case ISD::BRCOND:             return visitBRCOND(N);
1673   case ISD::BR_CC:              return visitBR_CC(N);
1674   case ISD::LOAD:               return visitLOAD(N);
1675   case ISD::STORE:              return visitSTORE(N);
1676   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1677   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1678   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1679   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1680   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1681   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1682   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1683   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1684   case ISD::MGATHER:            return visitMGATHER(N);
1685   case ISD::MLOAD:              return visitMLOAD(N);
1686   case ISD::MSCATTER:           return visitMSCATTER(N);
1687   case ISD::MSTORE:             return visitMSTORE(N);
1688   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1689   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1690   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1691   case ISD::FREEZE:             return visitFREEZE(N);
1692   case ISD::VECREDUCE_FADD:
1693   case ISD::VECREDUCE_FMUL:
1694   case ISD::VECREDUCE_ADD:
1695   case ISD::VECREDUCE_MUL:
1696   case ISD::VECREDUCE_AND:
1697   case ISD::VECREDUCE_OR:
1698   case ISD::VECREDUCE_XOR:
1699   case ISD::VECREDUCE_SMAX:
1700   case ISD::VECREDUCE_SMIN:
1701   case ISD::VECREDUCE_UMAX:
1702   case ISD::VECREDUCE_UMIN:
1703   case ISD::VECREDUCE_FMAX:
1704   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1705   }
1706   return SDValue();
1707 }
1708 
1709 SDValue DAGCombiner::combine(SDNode *N) {
1710   SDValue RV;
1711   if (!DisableGenericCombines)
1712     RV = visit(N);
1713 
1714   // If nothing happened, try a target-specific DAG combine.
1715   if (!RV.getNode()) {
1716     assert(N->getOpcode() != ISD::DELETED_NODE &&
1717            "Node was deleted but visit returned NULL!");
1718 
1719     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1720         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1721 
1722       // Expose the DAG combiner to the target combiner impls.
1723       TargetLowering::DAGCombinerInfo
1724         DagCombineInfo(DAG, Level, false, this);
1725 
1726       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1727     }
1728   }
1729 
1730   // If nothing happened still, try promoting the operation.
1731   if (!RV.getNode()) {
1732     switch (N->getOpcode()) {
1733     default: break;
1734     case ISD::ADD:
1735     case ISD::SUB:
1736     case ISD::MUL:
1737     case ISD::AND:
1738     case ISD::OR:
1739     case ISD::XOR:
1740       RV = PromoteIntBinOp(SDValue(N, 0));
1741       break;
1742     case ISD::SHL:
1743     case ISD::SRA:
1744     case ISD::SRL:
1745       RV = PromoteIntShiftOp(SDValue(N, 0));
1746       break;
1747     case ISD::SIGN_EXTEND:
1748     case ISD::ZERO_EXTEND:
1749     case ISD::ANY_EXTEND:
1750       RV = PromoteExtend(SDValue(N, 0));
1751       break;
1752     case ISD::LOAD:
1753       if (PromoteLoad(SDValue(N, 0)))
1754         RV = SDValue(N, 0);
1755       break;
1756     }
1757   }
1758 
1759   // If N is a commutative binary node, try to eliminate it if the commuted
1760   // version is already present in the DAG.
1761   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1762       N->getNumValues() == 1) {
1763     SDValue N0 = N->getOperand(0);
1764     SDValue N1 = N->getOperand(1);
1765 
1766     // Constant operands are canonicalized to RHS.
1767     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1768       SDValue Ops[] = {N1, N0};
1769       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1770                                             N->getFlags());
1771       if (CSENode)
1772         return SDValue(CSENode, 0);
1773     }
1774   }
1775 
1776   return RV;
1777 }
1778 
1779 /// Given a node, return its input chain if it has one, otherwise return a null
1780 /// sd operand.
1781 static SDValue getInputChainForNode(SDNode *N) {
1782   if (unsigned NumOps = N->getNumOperands()) {
1783     if (N->getOperand(0).getValueType() == MVT::Other)
1784       return N->getOperand(0);
1785     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1786       return N->getOperand(NumOps-1);
1787     for (unsigned i = 1; i < NumOps-1; ++i)
1788       if (N->getOperand(i).getValueType() == MVT::Other)
1789         return N->getOperand(i);
1790   }
1791   return SDValue();
1792 }
1793 
1794 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1795   // If N has two operands, where one has an input chain equal to the other,
1796   // the 'other' chain is redundant.
1797   if (N->getNumOperands() == 2) {
1798     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1799       return N->getOperand(0);
1800     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1801       return N->getOperand(1);
1802   }
1803 
1804   // Don't simplify token factors if optnone.
1805   if (OptLevel == CodeGenOpt::None)
1806     return SDValue();
1807 
1808   // If the sole user is a token factor, we should make sure we have a
1809   // chance to merge them together. This prevents TF chains from inhibiting
1810   // optimizations.
1811   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1812     AddToWorklist(*(N->use_begin()));
1813 
1814   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1815   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1816   SmallPtrSet<SDNode*, 16> SeenOps;
1817   bool Changed = false;             // If we should replace this token factor.
1818 
1819   // Start out with this token factor.
1820   TFs.push_back(N);
1821 
1822   // Iterate through token factors.  The TFs grows when new token factors are
1823   // encountered.
1824   for (unsigned i = 0; i < TFs.size(); ++i) {
1825     // Limit number of nodes to inline, to avoid quadratic compile times.
1826     // We have to add the outstanding Token Factors to Ops, otherwise we might
1827     // drop Ops from the resulting Token Factors.
1828     if (Ops.size() > TokenFactorInlineLimit) {
1829       for (unsigned j = i; j < TFs.size(); j++)
1830         Ops.emplace_back(TFs[j], 0);
1831       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1832       // combiner worklist later.
1833       TFs.resize(i);
1834       break;
1835     }
1836 
1837     SDNode *TF = TFs[i];
1838     // Check each of the operands.
1839     for (const SDValue &Op : TF->op_values()) {
1840       switch (Op.getOpcode()) {
1841       case ISD::EntryToken:
1842         // Entry tokens don't need to be added to the list. They are
1843         // redundant.
1844         Changed = true;
1845         break;
1846 
1847       case ISD::TokenFactor:
1848         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1849           // Queue up for processing.
1850           TFs.push_back(Op.getNode());
1851           Changed = true;
1852           break;
1853         }
1854         LLVM_FALLTHROUGH;
1855 
1856       default:
1857         // Only add if it isn't already in the list.
1858         if (SeenOps.insert(Op.getNode()).second)
1859           Ops.push_back(Op);
1860         else
1861           Changed = true;
1862         break;
1863       }
1864     }
1865   }
1866 
1867   // Re-visit inlined Token Factors, to clean them up in case they have been
1868   // removed. Skip the first Token Factor, as this is the current node.
1869   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1870     AddToWorklist(TFs[i]);
1871 
1872   // Remove Nodes that are chained to another node in the list. Do so
1873   // by walking up chains breath-first stopping when we've seen
1874   // another operand. In general we must climb to the EntryNode, but we can exit
1875   // early if we find all remaining work is associated with just one operand as
1876   // no further pruning is possible.
1877 
1878   // List of nodes to search through and original Ops from which they originate.
1879   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1880   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1881   SmallPtrSet<SDNode *, 16> SeenChains;
1882   bool DidPruneOps = false;
1883 
1884   unsigned NumLeftToConsider = 0;
1885   for (const SDValue &Op : Ops) {
1886     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1887     OpWorkCount.push_back(1);
1888   }
1889 
1890   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1891     // If this is an Op, we can remove the op from the list. Remark any
1892     // search associated with it as from the current OpNumber.
1893     if (SeenOps.count(Op) != 0) {
1894       Changed = true;
1895       DidPruneOps = true;
1896       unsigned OrigOpNumber = 0;
1897       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1898         OrigOpNumber++;
1899       assert((OrigOpNumber != Ops.size()) &&
1900              "expected to find TokenFactor Operand");
1901       // Re-mark worklist from OrigOpNumber to OpNumber
1902       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1903         if (Worklist[i].second == OrigOpNumber) {
1904           Worklist[i].second = OpNumber;
1905         }
1906       }
1907       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1908       OpWorkCount[OrigOpNumber] = 0;
1909       NumLeftToConsider--;
1910     }
1911     // Add if it's a new chain
1912     if (SeenChains.insert(Op).second) {
1913       OpWorkCount[OpNumber]++;
1914       Worklist.push_back(std::make_pair(Op, OpNumber));
1915     }
1916   };
1917 
1918   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1919     // We need at least be consider at least 2 Ops to prune.
1920     if (NumLeftToConsider <= 1)
1921       break;
1922     auto CurNode = Worklist[i].first;
1923     auto CurOpNumber = Worklist[i].second;
1924     assert((OpWorkCount[CurOpNumber] > 0) &&
1925            "Node should not appear in worklist");
1926     switch (CurNode->getOpcode()) {
1927     case ISD::EntryToken:
1928       // Hitting EntryToken is the only way for the search to terminate without
1929       // hitting
1930       // another operand's search. Prevent us from marking this operand
1931       // considered.
1932       NumLeftToConsider++;
1933       break;
1934     case ISD::TokenFactor:
1935       for (const SDValue &Op : CurNode->op_values())
1936         AddToWorklist(i, Op.getNode(), CurOpNumber);
1937       break;
1938     case ISD::LIFETIME_START:
1939     case ISD::LIFETIME_END:
1940     case ISD::CopyFromReg:
1941     case ISD::CopyToReg:
1942       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1943       break;
1944     default:
1945       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1946         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1947       break;
1948     }
1949     OpWorkCount[CurOpNumber]--;
1950     if (OpWorkCount[CurOpNumber] == 0)
1951       NumLeftToConsider--;
1952   }
1953 
1954   // If we've changed things around then replace token factor.
1955   if (Changed) {
1956     SDValue Result;
1957     if (Ops.empty()) {
1958       // The entry token is the only possible outcome.
1959       Result = DAG.getEntryNode();
1960     } else {
1961       if (DidPruneOps) {
1962         SmallVector<SDValue, 8> PrunedOps;
1963         //
1964         for (const SDValue &Op : Ops) {
1965           if (SeenChains.count(Op.getNode()) == 0)
1966             PrunedOps.push_back(Op);
1967         }
1968         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1969       } else {
1970         Result = DAG.getTokenFactor(SDLoc(N), Ops);
1971       }
1972     }
1973     return Result;
1974   }
1975   return SDValue();
1976 }
1977 
1978 /// MERGE_VALUES can always be eliminated.
1979 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1980   WorklistRemover DeadNodes(*this);
1981   // Replacing results may cause a different MERGE_VALUES to suddenly
1982   // be CSE'd with N, and carry its uses with it. Iterate until no
1983   // uses remain, to ensure that the node can be safely deleted.
1984   // First add the users of this node to the work list so that they
1985   // can be tried again once they have new operands.
1986   AddUsersToWorklist(N);
1987   do {
1988     // Do as a single replacement to avoid rewalking use lists.
1989     SmallVector<SDValue, 8> Ops;
1990     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1991       Ops.push_back(N->getOperand(i));
1992     DAG.ReplaceAllUsesWith(N, Ops.data());
1993   } while (!N->use_empty());
1994   deleteAndRecombine(N);
1995   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1996 }
1997 
1998 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1999 /// ConstantSDNode pointer else nullptr.
2000 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2001   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2002   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2003 }
2004 
2005 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2006   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2007          "Unexpected binary operator");
2008 
2009   // Don't do this unless the old select is going away. We want to eliminate the
2010   // binary operator, not replace a binop with a select.
2011   // TODO: Handle ISD::SELECT_CC.
2012   unsigned SelOpNo = 0;
2013   SDValue Sel = BO->getOperand(0);
2014   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2015     SelOpNo = 1;
2016     Sel = BO->getOperand(1);
2017   }
2018 
2019   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2020     return SDValue();
2021 
2022   SDValue CT = Sel.getOperand(1);
2023   if (!isConstantOrConstantVector(CT, true) &&
2024       !isConstantFPBuildVectorOrConstantFP(CT))
2025     return SDValue();
2026 
2027   SDValue CF = Sel.getOperand(2);
2028   if (!isConstantOrConstantVector(CF, true) &&
2029       !isConstantFPBuildVectorOrConstantFP(CF))
2030     return SDValue();
2031 
2032   // Bail out if any constants are opaque because we can't constant fold those.
2033   // The exception is "and" and "or" with either 0 or -1 in which case we can
2034   // propagate non constant operands into select. I.e.:
2035   // and (select Cond, 0, -1), X --> select Cond, 0, X
2036   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2037   auto BinOpcode = BO->getOpcode();
2038   bool CanFoldNonConst =
2039       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2040       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2041       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2042 
2043   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2044   if (!CanFoldNonConst &&
2045       !isConstantOrConstantVector(CBO, true) &&
2046       !isConstantFPBuildVectorOrConstantFP(CBO))
2047     return SDValue();
2048 
2049   EVT VT = Sel.getValueType();
2050 
2051   // In case of shift value and shift amount may have different VT. For instance
2052   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2053   // swapped operands and value types do not match. NB: x86 is fine if operands
2054   // are not swapped with shift amount VT being not bigger than shifted value.
2055   // TODO: that is possible to check for a shift operation, correct VTs and
2056   // still perform optimization on x86 if needed.
2057   if (SelOpNo && VT != CBO.getValueType())
2058     return SDValue();
2059 
2060   // We have a select-of-constants followed by a binary operator with a
2061   // constant. Eliminate the binop by pulling the constant math into the select.
2062   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2063   SDLoc DL(Sel);
2064   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2065                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2066   if (!CanFoldNonConst && !NewCT.isUndef() &&
2067       !isConstantOrConstantVector(NewCT, true) &&
2068       !isConstantFPBuildVectorOrConstantFP(NewCT))
2069     return SDValue();
2070 
2071   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2072                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2073   if (!CanFoldNonConst && !NewCF.isUndef() &&
2074       !isConstantOrConstantVector(NewCF, true) &&
2075       !isConstantFPBuildVectorOrConstantFP(NewCF))
2076     return SDValue();
2077 
2078   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2079   SelectOp->setFlags(BO->getFlags());
2080   return SelectOp;
2081 }
2082 
2083 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2084   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2085          "Expecting add or sub");
2086 
2087   // Match a constant operand and a zext operand for the math instruction:
2088   // add Z, C
2089   // sub C, Z
2090   bool IsAdd = N->getOpcode() == ISD::ADD;
2091   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2092   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2093   auto *CN = dyn_cast<ConstantSDNode>(C);
2094   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2095     return SDValue();
2096 
2097   // Match the zext operand as a setcc of a boolean.
2098   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2099       Z.getOperand(0).getValueType() != MVT::i1)
2100     return SDValue();
2101 
2102   // Match the compare as: setcc (X & 1), 0, eq.
2103   SDValue SetCC = Z.getOperand(0);
2104   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2105   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2106       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2107       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2108     return SDValue();
2109 
2110   // We are adding/subtracting a constant and an inverted low bit. Turn that
2111   // into a subtract/add of the low bit with incremented/decremented constant:
2112   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2113   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2114   EVT VT = C.getValueType();
2115   SDLoc DL(N);
2116   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2117   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2118                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2119   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2120 }
2121 
2122 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2123 /// a shift and add with a different constant.
2124 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2125   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2126          "Expecting add or sub");
2127 
2128   // We need a constant operand for the add/sub, and the other operand is a
2129   // logical shift right: add (srl), C or sub C, (srl).
2130   bool IsAdd = N->getOpcode() == ISD::ADD;
2131   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2132   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2133   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2134       ShiftOp.getOpcode() != ISD::SRL)
2135     return SDValue();
2136 
2137   // The shift must be of a 'not' value.
2138   SDValue Not = ShiftOp.getOperand(0);
2139   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2140     return SDValue();
2141 
2142   // The shift must be moving the sign bit to the least-significant-bit.
2143   EVT VT = ShiftOp.getValueType();
2144   SDValue ShAmt = ShiftOp.getOperand(1);
2145   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2146   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2147     return SDValue();
2148 
2149   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2150   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2151   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2152   SDLoc DL(N);
2153   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2154   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2155   if (SDValue NewC =
2156           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2157                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2158     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2159   return SDValue();
2160 }
2161 
2162 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2163 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2164 /// are no common bits set in the operands).
2165 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2166   SDValue N0 = N->getOperand(0);
2167   SDValue N1 = N->getOperand(1);
2168   EVT VT = N0.getValueType();
2169   SDLoc DL(N);
2170 
2171   // fold vector ops
2172   if (VT.isVector()) {
2173     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2174       return FoldedVOp;
2175 
2176     // fold (add x, 0) -> x, vector edition
2177     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2178       return N0;
2179     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2180       return N1;
2181   }
2182 
2183   // fold (add x, undef) -> undef
2184   if (N0.isUndef())
2185     return N0;
2186 
2187   if (N1.isUndef())
2188     return N1;
2189 
2190   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2191     // canonicalize constant to RHS
2192     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2193       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2194     // fold (add c1, c2) -> c1+c2
2195     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2196   }
2197 
2198   // fold (add x, 0) -> x
2199   if (isNullConstant(N1))
2200     return N0;
2201 
2202   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2203     // fold ((A-c1)+c2) -> (A+(c2-c1))
2204     if (N0.getOpcode() == ISD::SUB &&
2205         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2206       SDValue Sub =
2207           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2208       assert(Sub && "Constant folding failed");
2209       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2210     }
2211 
2212     // fold ((c1-A)+c2) -> (c1+c2)-A
2213     if (N0.getOpcode() == ISD::SUB &&
2214         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2215       SDValue Add =
2216           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2217       assert(Add && "Constant folding failed");
2218       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2219     }
2220 
2221     // add (sext i1 X), 1 -> zext (not i1 X)
2222     // We don't transform this pattern:
2223     //   add (zext i1 X), -1 -> sext (not i1 X)
2224     // because most (?) targets generate better code for the zext form.
2225     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2226         isOneOrOneSplat(N1)) {
2227       SDValue X = N0.getOperand(0);
2228       if ((!LegalOperations ||
2229            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2230             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2231           X.getScalarValueSizeInBits() == 1) {
2232         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2233         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2234       }
2235     }
2236 
2237     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2238     // equivalent to (add x, c0).
2239     if (N0.getOpcode() == ISD::OR &&
2240         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2241         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2242       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2243                                                     {N1, N0.getOperand(1)}))
2244         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2245     }
2246   }
2247 
2248   if (SDValue NewSel = foldBinOpIntoSelect(N))
2249     return NewSel;
2250 
2251   // reassociate add
2252   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2253     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2254       return RADD;
2255   }
2256   // fold ((0-A) + B) -> B-A
2257   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2258     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2259 
2260   // fold (A + (0-B)) -> A-B
2261   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2262     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2263 
2264   // fold (A+(B-A)) -> B
2265   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2266     return N1.getOperand(0);
2267 
2268   // fold ((B-A)+A) -> B
2269   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2270     return N0.getOperand(0);
2271 
2272   // fold ((A-B)+(C-A)) -> (C-B)
2273   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2274       N0.getOperand(0) == N1.getOperand(1))
2275     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2276                        N0.getOperand(1));
2277 
2278   // fold ((A-B)+(B-C)) -> (A-C)
2279   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2280       N0.getOperand(1) == N1.getOperand(0))
2281     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2282                        N1.getOperand(1));
2283 
2284   // fold (A+(B-(A+C))) to (B-C)
2285   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2286       N0 == N1.getOperand(1).getOperand(0))
2287     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2288                        N1.getOperand(1).getOperand(1));
2289 
2290   // fold (A+(B-(C+A))) to (B-C)
2291   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2292       N0 == N1.getOperand(1).getOperand(1))
2293     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2294                        N1.getOperand(1).getOperand(0));
2295 
2296   // fold (A+((B-A)+or-C)) to (B+or-C)
2297   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2298       N1.getOperand(0).getOpcode() == ISD::SUB &&
2299       N0 == N1.getOperand(0).getOperand(1))
2300     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2301                        N1.getOperand(1));
2302 
2303   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2304   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2305     SDValue N00 = N0.getOperand(0);
2306     SDValue N01 = N0.getOperand(1);
2307     SDValue N10 = N1.getOperand(0);
2308     SDValue N11 = N1.getOperand(1);
2309 
2310     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2311       return DAG.getNode(ISD::SUB, DL, VT,
2312                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2313                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2314   }
2315 
2316   // fold (add (umax X, C), -C) --> (usubsat X, C)
2317   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2318     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2319       return (!Max && !Op) ||
2320              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2321     };
2322     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2323                                   /*AllowUndefs*/ true))
2324       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2325                          N0.getOperand(1));
2326   }
2327 
2328   if (SimplifyDemandedBits(SDValue(N, 0)))
2329     return SDValue(N, 0);
2330 
2331   if (isOneOrOneSplat(N1)) {
2332     // fold (add (xor a, -1), 1) -> (sub 0, a)
2333     if (isBitwiseNot(N0))
2334       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2335                          N0.getOperand(0));
2336 
2337     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2338     if (N0.getOpcode() == ISD::ADD ||
2339         N0.getOpcode() == ISD::UADDO ||
2340         N0.getOpcode() == ISD::SADDO) {
2341       SDValue A, Xor;
2342 
2343       if (isBitwiseNot(N0.getOperand(0))) {
2344         A = N0.getOperand(1);
2345         Xor = N0.getOperand(0);
2346       } else if (isBitwiseNot(N0.getOperand(1))) {
2347         A = N0.getOperand(0);
2348         Xor = N0.getOperand(1);
2349       }
2350 
2351       if (Xor)
2352         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2353     }
2354 
2355     // Look for:
2356     //   add (add x, y), 1
2357     // And if the target does not like this form then turn into:
2358     //   sub y, (xor x, -1)
2359     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2360         N0.getOpcode() == ISD::ADD) {
2361       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2362                                 DAG.getAllOnesConstant(DL, VT));
2363       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2364     }
2365   }
2366 
2367   // (x - y) + -1  ->  add (xor y, -1), x
2368   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2369       isAllOnesOrAllOnesSplat(N1)) {
2370     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2371     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2372   }
2373 
2374   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2375     return Combined;
2376 
2377   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2378     return Combined;
2379 
2380   return SDValue();
2381 }
2382 
2383 SDValue DAGCombiner::visitADD(SDNode *N) {
2384   SDValue N0 = N->getOperand(0);
2385   SDValue N1 = N->getOperand(1);
2386   EVT VT = N0.getValueType();
2387   SDLoc DL(N);
2388 
2389   if (SDValue Combined = visitADDLike(N))
2390     return Combined;
2391 
2392   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2393     return V;
2394 
2395   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2396     return V;
2397 
2398   // fold (a+b) -> (a|b) iff a and b share no bits.
2399   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2400       DAG.haveNoCommonBitsSet(N0, N1))
2401     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2402 
2403   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2404   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2405     APInt C0 = N0->getConstantOperandAPInt(0);
2406     APInt C1 = N1->getConstantOperandAPInt(0);
2407     return DAG.getVScale(DL, VT, C0 + C1);
2408   }
2409 
2410   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2411   if ((N0.getOpcode() == ISD::ADD) &&
2412       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2413       (N1.getOpcode() == ISD::VSCALE)) {
2414     auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2415     auto VS1 = N1->getConstantOperandAPInt(0);
2416     auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
2417     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2418   }
2419 
2420   return SDValue();
2421 }
2422 
2423 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2424   unsigned Opcode = N->getOpcode();
2425   SDValue N0 = N->getOperand(0);
2426   SDValue N1 = N->getOperand(1);
2427   EVT VT = N0.getValueType();
2428   SDLoc DL(N);
2429 
2430   // fold vector ops
2431   if (VT.isVector()) {
2432     // TODO SimplifyVBinOp
2433 
2434     // fold (add_sat x, 0) -> x, vector edition
2435     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2436       return N0;
2437     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2438       return N1;
2439   }
2440 
2441   // fold (add_sat x, undef) -> -1
2442   if (N0.isUndef() || N1.isUndef())
2443     return DAG.getAllOnesConstant(DL, VT);
2444 
2445   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2446     // canonicalize constant to RHS
2447     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2448       return DAG.getNode(Opcode, DL, VT, N1, N0);
2449     // fold (add_sat c1, c2) -> c3
2450     return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2451   }
2452 
2453   // fold (add_sat x, 0) -> x
2454   if (isNullConstant(N1))
2455     return N0;
2456 
2457   // If it cannot overflow, transform into an add.
2458   if (Opcode == ISD::UADDSAT)
2459     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2460       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2461 
2462   return SDValue();
2463 }
2464 
2465 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2466   bool Masked = false;
2467 
2468   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2469   while (true) {
2470     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2471       V = V.getOperand(0);
2472       continue;
2473     }
2474 
2475     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2476       Masked = true;
2477       V = V.getOperand(0);
2478       continue;
2479     }
2480 
2481     break;
2482   }
2483 
2484   // If this is not a carry, return.
2485   if (V.getResNo() != 1)
2486     return SDValue();
2487 
2488   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2489       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2490     return SDValue();
2491 
2492   EVT VT = V.getNode()->getValueType(0);
2493   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2494     return SDValue();
2495 
2496   // If the result is masked, then no matter what kind of bool it is we can
2497   // return. If it isn't, then we need to make sure the bool type is either 0 or
2498   // 1 and not other values.
2499   if (Masked ||
2500       TLI.getBooleanContents(V.getValueType()) ==
2501           TargetLoweringBase::ZeroOrOneBooleanContent)
2502     return V;
2503 
2504   return SDValue();
2505 }
2506 
2507 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2508 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2509 /// the opcode and bypass the mask operation.
2510 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2511                                  SelectionDAG &DAG, const SDLoc &DL) {
2512   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2513     return SDValue();
2514 
2515   EVT VT = N0.getValueType();
2516   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2517     return SDValue();
2518 
2519   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2520   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2521   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2522 }
2523 
2524 /// Helper for doing combines based on N0 and N1 being added to each other.
2525 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2526                                           SDNode *LocReference) {
2527   EVT VT = N0.getValueType();
2528   SDLoc DL(LocReference);
2529 
2530   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2531   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2532       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2533     return DAG.getNode(ISD::SUB, DL, VT, N0,
2534                        DAG.getNode(ISD::SHL, DL, VT,
2535                                    N1.getOperand(0).getOperand(1),
2536                                    N1.getOperand(1)));
2537 
2538   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2539     return V;
2540 
2541   // Look for:
2542   //   add (add x, 1), y
2543   // And if the target does not like this form then turn into:
2544   //   sub y, (xor x, -1)
2545   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2546       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2547     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2548                               DAG.getAllOnesConstant(DL, VT));
2549     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2550   }
2551 
2552   // Hoist one-use subtraction by non-opaque constant:
2553   //   (x - C) + y  ->  (x + y) - C
2554   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2555   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2556       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2557     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2558     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2559   }
2560   // Hoist one-use subtraction from non-opaque constant:
2561   //   (C - x) + y  ->  (y - x) + C
2562   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2563       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2564     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2565     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2566   }
2567 
2568   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2569   // rather than 'add 0/-1' (the zext should get folded).
2570   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2571   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2572       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2573       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2574     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2575     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2576   }
2577 
2578   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2579   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2580     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2581     if (TN->getVT() == MVT::i1) {
2582       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2583                                  DAG.getConstant(1, DL, VT));
2584       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2585     }
2586   }
2587 
2588   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2589   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2590       N1.getResNo() == 0)
2591     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2592                        N0, N1.getOperand(0), N1.getOperand(2));
2593 
2594   // (add X, Carry) -> (addcarry X, 0, Carry)
2595   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2596     if (SDValue Carry = getAsCarry(TLI, N1))
2597       return DAG.getNode(ISD::ADDCARRY, DL,
2598                          DAG.getVTList(VT, Carry.getValueType()), N0,
2599                          DAG.getConstant(0, DL, VT), Carry);
2600 
2601   return SDValue();
2602 }
2603 
2604 SDValue DAGCombiner::visitADDC(SDNode *N) {
2605   SDValue N0 = N->getOperand(0);
2606   SDValue N1 = N->getOperand(1);
2607   EVT VT = N0.getValueType();
2608   SDLoc DL(N);
2609 
2610   // If the flag result is dead, turn this into an ADD.
2611   if (!N->hasAnyUseOfValue(1))
2612     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2613                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2614 
2615   // canonicalize constant to RHS.
2616   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2617   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2618   if (N0C && !N1C)
2619     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2620 
2621   // fold (addc x, 0) -> x + no carry out
2622   if (isNullConstant(N1))
2623     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2624                                         DL, MVT::Glue));
2625 
2626   // If it cannot overflow, transform into an add.
2627   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2628     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2629                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2630 
2631   return SDValue();
2632 }
2633 
2634 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2635                            SelectionDAG &DAG, const TargetLowering &TLI) {
2636   EVT VT = V.getValueType();
2637 
2638   SDValue Cst;
2639   switch (TLI.getBooleanContents(VT)) {
2640   case TargetLowering::ZeroOrOneBooleanContent:
2641   case TargetLowering::UndefinedBooleanContent:
2642     Cst = DAG.getConstant(1, DL, VT);
2643     break;
2644   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2645     Cst = DAG.getAllOnesConstant(DL, VT);
2646     break;
2647   }
2648 
2649   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2650 }
2651 
2652 /**
2653  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2654  * then the flip also occurs if computing the inverse is the same cost.
2655  * This function returns an empty SDValue in case it cannot flip the boolean
2656  * without increasing the cost of the computation. If you want to flip a boolean
2657  * no matter what, use flipBoolean.
2658  */
2659 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2660                                   const TargetLowering &TLI,
2661                                   bool Force) {
2662   if (Force && isa<ConstantSDNode>(V))
2663     return flipBoolean(V, SDLoc(V), DAG, TLI);
2664 
2665   if (V.getOpcode() != ISD::XOR)
2666     return SDValue();
2667 
2668   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2669   if (!Const)
2670     return SDValue();
2671 
2672   EVT VT = V.getValueType();
2673 
2674   bool IsFlip = false;
2675   switch(TLI.getBooleanContents(VT)) {
2676     case TargetLowering::ZeroOrOneBooleanContent:
2677       IsFlip = Const->isOne();
2678       break;
2679     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2680       IsFlip = Const->isAllOnesValue();
2681       break;
2682     case TargetLowering::UndefinedBooleanContent:
2683       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2684       break;
2685   }
2686 
2687   if (IsFlip)
2688     return V.getOperand(0);
2689   if (Force)
2690     return flipBoolean(V, SDLoc(V), DAG, TLI);
2691   return SDValue();
2692 }
2693 
2694 SDValue DAGCombiner::visitADDO(SDNode *N) {
2695   SDValue N0 = N->getOperand(0);
2696   SDValue N1 = N->getOperand(1);
2697   EVT VT = N0.getValueType();
2698   bool IsSigned = (ISD::SADDO == N->getOpcode());
2699 
2700   EVT CarryVT = N->getValueType(1);
2701   SDLoc DL(N);
2702 
2703   // If the flag result is dead, turn this into an ADD.
2704   if (!N->hasAnyUseOfValue(1))
2705     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2706                      DAG.getUNDEF(CarryVT));
2707 
2708   // canonicalize constant to RHS.
2709   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2710       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2711     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2712 
2713   // fold (addo x, 0) -> x + no carry out
2714   if (isNullOrNullSplat(N1))
2715     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2716 
2717   if (!IsSigned) {
2718     // If it cannot overflow, transform into an add.
2719     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2720       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2721                        DAG.getConstant(0, DL, CarryVT));
2722 
2723     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2724     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2725       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2726                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2727       return CombineTo(N, Sub,
2728                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2729     }
2730 
2731     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2732       return Combined;
2733 
2734     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2735       return Combined;
2736   }
2737 
2738   return SDValue();
2739 }
2740 
2741 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2742   EVT VT = N0.getValueType();
2743   if (VT.isVector())
2744     return SDValue();
2745 
2746   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2747   // If Y + 1 cannot overflow.
2748   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2749     SDValue Y = N1.getOperand(0);
2750     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2751     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2752       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2753                          N1.getOperand(2));
2754   }
2755 
2756   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2757   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2758     if (SDValue Carry = getAsCarry(TLI, N1))
2759       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2760                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2761 
2762   return SDValue();
2763 }
2764 
2765 SDValue DAGCombiner::visitADDE(SDNode *N) {
2766   SDValue N0 = N->getOperand(0);
2767   SDValue N1 = N->getOperand(1);
2768   SDValue CarryIn = N->getOperand(2);
2769 
2770   // canonicalize constant to RHS
2771   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2772   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2773   if (N0C && !N1C)
2774     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2775                        N1, N0, CarryIn);
2776 
2777   // fold (adde x, y, false) -> (addc x, y)
2778   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2779     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2780 
2781   return SDValue();
2782 }
2783 
2784 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2785   SDValue N0 = N->getOperand(0);
2786   SDValue N1 = N->getOperand(1);
2787   SDValue CarryIn = N->getOperand(2);
2788   SDLoc DL(N);
2789 
2790   // canonicalize constant to RHS
2791   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2792   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2793   if (N0C && !N1C)
2794     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2795 
2796   // fold (addcarry x, y, false) -> (uaddo x, y)
2797   if (isNullConstant(CarryIn)) {
2798     if (!LegalOperations ||
2799         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2800       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2801   }
2802 
2803   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2804   if (isNullConstant(N0) && isNullConstant(N1)) {
2805     EVT VT = N0.getValueType();
2806     EVT CarryVT = CarryIn.getValueType();
2807     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2808     AddToWorklist(CarryExt.getNode());
2809     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2810                                     DAG.getConstant(1, DL, VT)),
2811                      DAG.getConstant(0, DL, CarryVT));
2812   }
2813 
2814   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2815     return Combined;
2816 
2817   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2818     return Combined;
2819 
2820   return SDValue();
2821 }
2822 
2823 /**
2824  * If we are facing some sort of diamond carry propapagtion pattern try to
2825  * break it up to generate something like:
2826  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2827  *
2828  * The end result is usually an increase in operation required, but because the
2829  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2830  *
2831  * Patterns typically look something like
2832  *            (uaddo A, B)
2833  *             /       \
2834  *          Carry      Sum
2835  *            |          \
2836  *            | (addcarry *, 0, Z)
2837  *            |       /
2838  *             \   Carry
2839  *              |   /
2840  * (addcarry X, *, *)
2841  *
2842  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2843  * produce a combine with a single path for carry propagation.
2844  */
2845 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2846                                       SDValue X, SDValue Carry0, SDValue Carry1,
2847                                       SDNode *N) {
2848   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2849     return SDValue();
2850   if (Carry1.getOpcode() != ISD::UADDO)
2851     return SDValue();
2852 
2853   SDValue Z;
2854 
2855   /**
2856    * First look for a suitable Z. It will present itself in the form of
2857    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2858    */
2859   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2860       isNullConstant(Carry0.getOperand(1))) {
2861     Z = Carry0.getOperand(2);
2862   } else if (Carry0.getOpcode() == ISD::UADDO &&
2863              isOneConstant(Carry0.getOperand(1))) {
2864     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2865     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2866   } else {
2867     // We couldn't find a suitable Z.
2868     return SDValue();
2869   }
2870 
2871 
2872   auto cancelDiamond = [&](SDValue A,SDValue B) {
2873     SDLoc DL(N);
2874     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2875     Combiner.AddToWorklist(NewY.getNode());
2876     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2877                        DAG.getConstant(0, DL, X.getValueType()),
2878                        NewY.getValue(1));
2879   };
2880 
2881   /**
2882    *      (uaddo A, B)
2883    *           |
2884    *          Sum
2885    *           |
2886    * (addcarry *, 0, Z)
2887    */
2888   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2889     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2890   }
2891 
2892   /**
2893    * (addcarry A, 0, Z)
2894    *         |
2895    *        Sum
2896    *         |
2897    *  (uaddo *, B)
2898    */
2899   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2900     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2901   }
2902 
2903   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2904     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2905   }
2906 
2907   return SDValue();
2908 }
2909 
2910 // If we are facing some sort of diamond carry/borrow in/out pattern try to
2911 // match patterns like:
2912 //
2913 //          (uaddo A, B)            CarryIn
2914 //            |  \                     |
2915 //            |   \                    |
2916 //    PartialSum   PartialCarryOutX   /
2917 //            |        |             /
2918 //            |    ____|____________/
2919 //            |   /    |
2920 //     (uaddo *, *)    \________
2921 //       |  \                   \
2922 //       |   \                   |
2923 //       |    PartialCarryOutY   |
2924 //       |        \              |
2925 //       |         \            /
2926 //   AddCarrySum    |    ______/
2927 //                  |   /
2928 //   CarryOut = (or *, *)
2929 //
2930 // And generate ADDCARRY (or SUBCARRY) with two result values:
2931 //
2932 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
2933 //
2934 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
2935 // a single path for carry/borrow out propagation:
2936 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2937                                    const TargetLowering &TLI, SDValue Carry0,
2938                                    SDValue Carry1, SDNode *N) {
2939   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
2940     return SDValue();
2941   unsigned Opcode = Carry0.getOpcode();
2942   if (Opcode != Carry1.getOpcode())
2943     return SDValue();
2944   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
2945     return SDValue();
2946 
2947   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
2948   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
2949   // the above ASCII art.)
2950   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2951       Carry1.getOperand(1) != Carry0.getValue(0))
2952     std::swap(Carry0, Carry1);
2953   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2954       Carry1.getOperand(1) != Carry0.getValue(0))
2955     return SDValue();
2956 
2957   // The carry in value must be on the righthand side for subtraction.
2958   unsigned CarryInOperandNum =
2959       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
2960   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
2961     return SDValue();
2962   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
2963 
2964   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
2965   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
2966     return SDValue();
2967 
2968   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
2969   // TODO: make getAsCarry() aware of how partial carries are merged.
2970   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
2971     return SDValue();
2972   CarryIn = CarryIn.getOperand(0);
2973   if (CarryIn.getValueType() != MVT::i1)
2974     return SDValue();
2975 
2976   SDLoc DL(N);
2977   SDValue Merged =
2978       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
2979                   Carry0.getOperand(1), CarryIn);
2980 
2981   // Please note that because we have proven that the result of the UADDO/USUBO
2982   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
2983   // therefore prove that if the first UADDO/USUBO overflows, the second
2984   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
2985   // maximum value.
2986   //
2987   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
2988   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
2989   //
2990   // This is important because it means that OR and XOR can be used to merge
2991   // carry flags; and that AND can return a constant zero.
2992   //
2993   // TODO: match other operations that can merge flags (ADD, etc)
2994   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
2995   if (N->getOpcode() == ISD::AND)
2996     return DAG.getConstant(0, DL, MVT::i1);
2997   return Merged.getValue(1);
2998 }
2999 
3000 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3001                                        SDNode *N) {
3002   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3003   if (isBitwiseNot(N0))
3004     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3005       SDLoc DL(N);
3006       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3007                                 N0.getOperand(0), NotC);
3008       return CombineTo(N, Sub,
3009                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
3010     }
3011 
3012   // Iff the flag result is dead:
3013   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3014   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3015   // or the dependency between the instructions.
3016   if ((N0.getOpcode() == ISD::ADD ||
3017        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3018         N0.getValue(1) != CarryIn)) &&
3019       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3020     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3021                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3022 
3023   /**
3024    * When one of the addcarry argument is itself a carry, we may be facing
3025    * a diamond carry propagation. In which case we try to transform the DAG
3026    * to ensure linear carry propagation if that is possible.
3027    */
3028   if (auto Y = getAsCarry(TLI, N1)) {
3029     // Because both are carries, Y and Z can be swapped.
3030     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3031       return R;
3032     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3033       return R;
3034   }
3035 
3036   return SDValue();
3037 }
3038 
3039 // Since it may not be valid to emit a fold to zero for vector initializers
3040 // check if we can before folding.
3041 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3042                              SelectionDAG &DAG, bool LegalOperations) {
3043   if (!VT.isVector())
3044     return DAG.getConstant(0, DL, VT);
3045   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3046     return DAG.getConstant(0, DL, VT);
3047   return SDValue();
3048 }
3049 
3050 SDValue DAGCombiner::visitSUB(SDNode *N) {
3051   SDValue N0 = N->getOperand(0);
3052   SDValue N1 = N->getOperand(1);
3053   EVT VT = N0.getValueType();
3054   SDLoc DL(N);
3055 
3056   // fold vector ops
3057   if (VT.isVector()) {
3058     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3059       return FoldedVOp;
3060 
3061     // fold (sub x, 0) -> x, vector edition
3062     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3063       return N0;
3064   }
3065 
3066   // fold (sub x, x) -> 0
3067   // FIXME: Refactor this and xor and other similar operations together.
3068   if (N0 == N1)
3069     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3070 
3071   // fold (sub c1, c2) -> c3
3072   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3073     return C;
3074 
3075   if (SDValue NewSel = foldBinOpIntoSelect(N))
3076     return NewSel;
3077 
3078   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3079 
3080   // fold (sub x, c) -> (add x, -c)
3081   if (N1C) {
3082     return DAG.getNode(ISD::ADD, DL, VT, N0,
3083                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3084   }
3085 
3086   if (isNullOrNullSplat(N0)) {
3087     unsigned BitWidth = VT.getScalarSizeInBits();
3088     // Right-shifting everything out but the sign bit followed by negation is
3089     // the same as flipping arithmetic/logical shift type without the negation:
3090     // -(X >>u 31) -> (X >>s 31)
3091     // -(X >>s 31) -> (X >>u 31)
3092     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3093       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3094       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3095         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3096         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3097           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3098       }
3099     }
3100 
3101     // 0 - X --> 0 if the sub is NUW.
3102     if (N->getFlags().hasNoUnsignedWrap())
3103       return N0;
3104 
3105     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3106       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3107       // N1 must be 0 because negating the minimum signed value is undefined.
3108       if (N->getFlags().hasNoSignedWrap())
3109         return N0;
3110 
3111       // 0 - X --> X if X is 0 or the minimum signed value.
3112       return N1;
3113     }
3114   }
3115 
3116   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3117   if (isAllOnesOrAllOnesSplat(N0))
3118     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3119 
3120   // fold (A - (0-B)) -> A+B
3121   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3122     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3123 
3124   // fold A-(A-B) -> B
3125   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3126     return N1.getOperand(1);
3127 
3128   // fold (A+B)-A -> B
3129   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3130     return N0.getOperand(1);
3131 
3132   // fold (A+B)-B -> A
3133   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3134     return N0.getOperand(0);
3135 
3136   // fold (A+C1)-C2 -> A+(C1-C2)
3137   if (N0.getOpcode() == ISD::ADD &&
3138       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3139       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3140     SDValue NewC =
3141         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3142     assert(NewC && "Constant folding failed");
3143     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3144   }
3145 
3146   // fold C2-(A+C1) -> (C2-C1)-A
3147   if (N1.getOpcode() == ISD::ADD) {
3148     SDValue N11 = N1.getOperand(1);
3149     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3150         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3151       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3152       assert(NewC && "Constant folding failed");
3153       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3154     }
3155   }
3156 
3157   // fold (A-C1)-C2 -> A-(C1+C2)
3158   if (N0.getOpcode() == ISD::SUB &&
3159       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3160       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3161     SDValue NewC =
3162         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3163     assert(NewC && "Constant folding failed");
3164     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3165   }
3166 
3167   // fold (c1-A)-c2 -> (c1-c2)-A
3168   if (N0.getOpcode() == ISD::SUB &&
3169       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3170       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3171     SDValue NewC =
3172         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3173     assert(NewC && "Constant folding failed");
3174     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3175   }
3176 
3177   // fold ((A+(B+or-C))-B) -> A+or-C
3178   if (N0.getOpcode() == ISD::ADD &&
3179       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3180        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3181       N0.getOperand(1).getOperand(0) == N1)
3182     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3183                        N0.getOperand(1).getOperand(1));
3184 
3185   // fold ((A+(C+B))-B) -> A+C
3186   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3187       N0.getOperand(1).getOperand(1) == N1)
3188     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3189                        N0.getOperand(1).getOperand(0));
3190 
3191   // fold ((A-(B-C))-C) -> A-B
3192   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3193       N0.getOperand(1).getOperand(1) == N1)
3194     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3195                        N0.getOperand(1).getOperand(0));
3196 
3197   // fold (A-(B-C)) -> A+(C-B)
3198   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3199     return DAG.getNode(ISD::ADD, DL, VT, N0,
3200                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3201                                    N1.getOperand(0)));
3202 
3203   // A - (A & B)  ->  A & (~B)
3204   if (N1.getOpcode() == ISD::AND) {
3205     SDValue A = N1.getOperand(0);
3206     SDValue B = N1.getOperand(1);
3207     if (A != N0)
3208       std::swap(A, B);
3209     if (A == N0 &&
3210         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3211       SDValue InvB =
3212           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3213       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3214     }
3215   }
3216 
3217   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3218   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3219     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3220         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3221       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3222                                 N1.getOperand(0).getOperand(1),
3223                                 N1.getOperand(1));
3224       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3225     }
3226     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3227         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3228       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3229                                 N1.getOperand(0),
3230                                 N1.getOperand(1).getOperand(1));
3231       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3232     }
3233   }
3234 
3235   // If either operand of a sub is undef, the result is undef
3236   if (N0.isUndef())
3237     return N0;
3238   if (N1.isUndef())
3239     return N1;
3240 
3241   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3242     return V;
3243 
3244   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3245     return V;
3246 
3247   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3248     return V;
3249 
3250   // (x - y) - 1  ->  add (xor y, -1), x
3251   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3252     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3253                               DAG.getAllOnesConstant(DL, VT));
3254     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3255   }
3256 
3257   // Look for:
3258   //   sub y, (xor x, -1)
3259   // And if the target does not like this form then turn into:
3260   //   add (add x, y), 1
3261   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3262     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3263     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3264   }
3265 
3266   // Hoist one-use addition by non-opaque constant:
3267   //   (x + C) - y  ->  (x - y) + C
3268   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3269       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3270     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3271     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3272   }
3273   // y - (x + C)  ->  (y - x) - C
3274   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3275       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3276     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3277     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3278   }
3279   // (x - C) - y  ->  (x - y) - C
3280   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3281   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3282       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3283     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3284     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3285   }
3286   // (C - x) - y  ->  C - (x + y)
3287   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3288       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3289     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3290     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3291   }
3292 
3293   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3294   // rather than 'sub 0/1' (the sext should get folded).
3295   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3296   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3297       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3298       TLI.getBooleanContents(VT) ==
3299           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3300     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3301     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3302   }
3303 
3304   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3305   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3306     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3307       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3308       SDValue S0 = N1.getOperand(0);
3309       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3310         unsigned OpSizeInBits = VT.getScalarSizeInBits();
3311         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3312           if (C->getAPIntValue() == (OpSizeInBits - 1))
3313             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3314       }
3315     }
3316   }
3317 
3318   // If the relocation model supports it, consider symbol offsets.
3319   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3320     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3321       // fold (sub Sym, c) -> Sym-c
3322       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3323         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3324                                     GA->getOffset() -
3325                                         (uint64_t)N1C->getSExtValue());
3326       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3327       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3328         if (GA->getGlobal() == GB->getGlobal())
3329           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3330                                  DL, VT);
3331     }
3332 
3333   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3334   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3335     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3336     if (TN->getVT() == MVT::i1) {
3337       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3338                                  DAG.getConstant(1, DL, VT));
3339       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3340     }
3341   }
3342 
3343   // canonicalize (sub X, (vscale * C)) to (add X,  (vscale * -C))
3344   if (N1.getOpcode() == ISD::VSCALE) {
3345     APInt IntVal = N1.getConstantOperandAPInt(0);
3346     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3347   }
3348 
3349   // Prefer an add for more folding potential and possibly better codegen:
3350   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3351   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3352     SDValue ShAmt = N1.getOperand(1);
3353     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3354     if (ShAmtC &&
3355         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3356       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3357       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3358     }
3359   }
3360 
3361   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3362     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3363     if (SDValue Carry = getAsCarry(TLI, N0)) {
3364       SDValue X = N1;
3365       SDValue Zero = DAG.getConstant(0, DL, VT);
3366       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3367       return DAG.getNode(ISD::ADDCARRY, DL,
3368                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3369                          Carry);
3370     }
3371   }
3372 
3373   return SDValue();
3374 }
3375 
3376 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3377   SDValue N0 = N->getOperand(0);
3378   SDValue N1 = N->getOperand(1);
3379   EVT VT = N0.getValueType();
3380   SDLoc DL(N);
3381 
3382   // fold vector ops
3383   if (VT.isVector()) {
3384     // TODO SimplifyVBinOp
3385 
3386     // fold (sub_sat x, 0) -> x, vector edition
3387     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3388       return N0;
3389   }
3390 
3391   // fold (sub_sat x, undef) -> 0
3392   if (N0.isUndef() || N1.isUndef())
3393     return DAG.getConstant(0, DL, VT);
3394 
3395   // fold (sub_sat x, x) -> 0
3396   if (N0 == N1)
3397     return DAG.getConstant(0, DL, VT);
3398 
3399   // fold (sub_sat c1, c2) -> c3
3400   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3401     return C;
3402 
3403   // fold (sub_sat x, 0) -> x
3404   if (isNullConstant(N1))
3405     return N0;
3406 
3407   return SDValue();
3408 }
3409 
3410 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3411   SDValue N0 = N->getOperand(0);
3412   SDValue N1 = N->getOperand(1);
3413   EVT VT = N0.getValueType();
3414   SDLoc DL(N);
3415 
3416   // If the flag result is dead, turn this into an SUB.
3417   if (!N->hasAnyUseOfValue(1))
3418     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3419                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3420 
3421   // fold (subc x, x) -> 0 + no borrow
3422   if (N0 == N1)
3423     return CombineTo(N, DAG.getConstant(0, DL, VT),
3424                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3425 
3426   // fold (subc x, 0) -> x + no borrow
3427   if (isNullConstant(N1))
3428     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3429 
3430   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3431   if (isAllOnesConstant(N0))
3432     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3433                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3434 
3435   return SDValue();
3436 }
3437 
3438 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3439   SDValue N0 = N->getOperand(0);
3440   SDValue N1 = N->getOperand(1);
3441   EVT VT = N0.getValueType();
3442   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3443 
3444   EVT CarryVT = N->getValueType(1);
3445   SDLoc DL(N);
3446 
3447   // If the flag result is dead, turn this into an SUB.
3448   if (!N->hasAnyUseOfValue(1))
3449     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3450                      DAG.getUNDEF(CarryVT));
3451 
3452   // fold (subo x, x) -> 0 + no borrow
3453   if (N0 == N1)
3454     return CombineTo(N, DAG.getConstant(0, DL, VT),
3455                      DAG.getConstant(0, DL, CarryVT));
3456 
3457   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3458 
3459   // fold (subox, c) -> (addo x, -c)
3460   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3461     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3462                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3463   }
3464 
3465   // fold (subo x, 0) -> x + no borrow
3466   if (isNullOrNullSplat(N1))
3467     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3468 
3469   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3470   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3471     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3472                      DAG.getConstant(0, DL, CarryVT));
3473 
3474   return SDValue();
3475 }
3476 
3477 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3478   SDValue N0 = N->getOperand(0);
3479   SDValue N1 = N->getOperand(1);
3480   SDValue CarryIn = N->getOperand(2);
3481 
3482   // fold (sube x, y, false) -> (subc x, y)
3483   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3484     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3485 
3486   return SDValue();
3487 }
3488 
3489 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3490   SDValue N0 = N->getOperand(0);
3491   SDValue N1 = N->getOperand(1);
3492   SDValue CarryIn = N->getOperand(2);
3493 
3494   // fold (subcarry x, y, false) -> (usubo x, y)
3495   if (isNullConstant(CarryIn)) {
3496     if (!LegalOperations ||
3497         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3498       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3499   }
3500 
3501   return SDValue();
3502 }
3503 
3504 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3505 // UMULFIXSAT here.
3506 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3507   SDValue N0 = N->getOperand(0);
3508   SDValue N1 = N->getOperand(1);
3509   SDValue Scale = N->getOperand(2);
3510   EVT VT = N0.getValueType();
3511 
3512   // fold (mulfix x, undef, scale) -> 0
3513   if (N0.isUndef() || N1.isUndef())
3514     return DAG.getConstant(0, SDLoc(N), VT);
3515 
3516   // Canonicalize constant to RHS (vector doesn't have to splat)
3517   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3518      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3519     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3520 
3521   // fold (mulfix x, 0, scale) -> 0
3522   if (isNullConstant(N1))
3523     return DAG.getConstant(0, SDLoc(N), VT);
3524 
3525   return SDValue();
3526 }
3527 
3528 SDValue DAGCombiner::visitMUL(SDNode *N) {
3529   SDValue N0 = N->getOperand(0);
3530   SDValue N1 = N->getOperand(1);
3531   EVT VT = N0.getValueType();
3532 
3533   // fold (mul x, undef) -> 0
3534   if (N0.isUndef() || N1.isUndef())
3535     return DAG.getConstant(0, SDLoc(N), VT);
3536 
3537   bool N1IsConst = false;
3538   bool N1IsOpaqueConst = false;
3539   APInt ConstValue1;
3540 
3541   // fold vector ops
3542   if (VT.isVector()) {
3543     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3544       return FoldedVOp;
3545 
3546     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3547     assert((!N1IsConst ||
3548             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3549            "Splat APInt should be element width");
3550   } else {
3551     N1IsConst = isa<ConstantSDNode>(N1);
3552     if (N1IsConst) {
3553       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3554       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3555     }
3556   }
3557 
3558   // fold (mul c1, c2) -> c1*c2
3559   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3560     return C;
3561 
3562   // canonicalize constant to RHS (vector doesn't have to splat)
3563   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3564      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3565     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3566 
3567   // fold (mul x, 0) -> 0
3568   if (N1IsConst && ConstValue1.isNullValue())
3569     return N1;
3570 
3571   // fold (mul x, 1) -> x
3572   if (N1IsConst && ConstValue1.isOneValue())
3573     return N0;
3574 
3575   if (SDValue NewSel = foldBinOpIntoSelect(N))
3576     return NewSel;
3577 
3578   // fold (mul x, -1) -> 0-x
3579   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3580     SDLoc DL(N);
3581     return DAG.getNode(ISD::SUB, DL, VT,
3582                        DAG.getConstant(0, DL, VT), N0);
3583   }
3584 
3585   // fold (mul x, (1 << c)) -> x << c
3586   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3587       DAG.isKnownToBeAPowerOfTwo(N1) &&
3588       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3589     SDLoc DL(N);
3590     SDValue LogBase2 = BuildLogBase2(N1, DL);
3591     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3592     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3593     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3594   }
3595 
3596   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3597   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3598     unsigned Log2Val = (-ConstValue1).logBase2();
3599     SDLoc DL(N);
3600     // FIXME: If the input is something that is easily negated (e.g. a
3601     // single-use add), we should put the negate there.
3602     return DAG.getNode(ISD::SUB, DL, VT,
3603                        DAG.getConstant(0, DL, VT),
3604                        DAG.getNode(ISD::SHL, DL, VT, N0,
3605                             DAG.getConstant(Log2Val, DL,
3606                                       getShiftAmountTy(N0.getValueType()))));
3607   }
3608 
3609   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3610   // mul x, (2^N + 1) --> add (shl x, N), x
3611   // mul x, (2^N - 1) --> sub (shl x, N), x
3612   // Examples: x * 33 --> (x << 5) + x
3613   //           x * 15 --> (x << 4) - x
3614   //           x * -33 --> -((x << 5) + x)
3615   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3616   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3617     // TODO: We could handle more general decomposition of any constant by
3618     //       having the target set a limit on number of ops and making a
3619     //       callback to determine that sequence (similar to sqrt expansion).
3620     unsigned MathOp = ISD::DELETED_NODE;
3621     APInt MulC = ConstValue1.abs();
3622     if ((MulC - 1).isPowerOf2())
3623       MathOp = ISD::ADD;
3624     else if ((MulC + 1).isPowerOf2())
3625       MathOp = ISD::SUB;
3626 
3627     if (MathOp != ISD::DELETED_NODE) {
3628       unsigned ShAmt =
3629           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3630       assert(ShAmt < VT.getScalarSizeInBits() &&
3631              "multiply-by-constant generated out of bounds shift");
3632       SDLoc DL(N);
3633       SDValue Shl =
3634           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3635       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3636       if (ConstValue1.isNegative())
3637         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3638       return R;
3639     }
3640   }
3641 
3642   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3643   if (N0.getOpcode() == ISD::SHL &&
3644       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3645       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3646     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3647     if (isConstantOrConstantVector(C3))
3648       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3649   }
3650 
3651   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3652   // use.
3653   {
3654     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3655 
3656     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3657     if (N0.getOpcode() == ISD::SHL &&
3658         isConstantOrConstantVector(N0.getOperand(1)) &&
3659         N0.getNode()->hasOneUse()) {
3660       Sh = N0; Y = N1;
3661     } else if (N1.getOpcode() == ISD::SHL &&
3662                isConstantOrConstantVector(N1.getOperand(1)) &&
3663                N1.getNode()->hasOneUse()) {
3664       Sh = N1; Y = N0;
3665     }
3666 
3667     if (Sh.getNode()) {
3668       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3669       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3670     }
3671   }
3672 
3673   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3674   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3675       N0.getOpcode() == ISD::ADD &&
3676       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3677       isMulAddWithConstProfitable(N, N0, N1))
3678       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3679                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3680                                      N0.getOperand(0), N1),
3681                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3682                                      N0.getOperand(1), N1));
3683 
3684   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3685   if (N0.getOpcode() == ISD::VSCALE)
3686     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3687       APInt C0 = N0.getConstantOperandAPInt(0);
3688       APInt C1 = NC1->getAPIntValue();
3689       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3690     }
3691 
3692   // reassociate mul
3693   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3694     return RMUL;
3695 
3696   return SDValue();
3697 }
3698 
3699 /// Return true if divmod libcall is available.
3700 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3701                                      const TargetLowering &TLI) {
3702   RTLIB::Libcall LC;
3703   EVT NodeType = Node->getValueType(0);
3704   if (!NodeType.isSimple())
3705     return false;
3706   switch (NodeType.getSimpleVT().SimpleTy) {
3707   default: return false; // No libcall for vector types.
3708   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3709   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3710   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3711   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3712   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3713   }
3714 
3715   return TLI.getLibcallName(LC) != nullptr;
3716 }
3717 
3718 /// Issue divrem if both quotient and remainder are needed.
3719 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3720   if (Node->use_empty())
3721     return SDValue(); // This is a dead node, leave it alone.
3722 
3723   unsigned Opcode = Node->getOpcode();
3724   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3725   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3726 
3727   // DivMod lib calls can still work on non-legal types if using lib-calls.
3728   EVT VT = Node->getValueType(0);
3729   if (VT.isVector() || !VT.isInteger())
3730     return SDValue();
3731 
3732   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3733     return SDValue();
3734 
3735   // If DIVREM is going to get expanded into a libcall,
3736   // but there is no libcall available, then don't combine.
3737   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3738       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3739     return SDValue();
3740 
3741   // If div is legal, it's better to do the normal expansion
3742   unsigned OtherOpcode = 0;
3743   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3744     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3745     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3746       return SDValue();
3747   } else {
3748     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3749     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3750       return SDValue();
3751   }
3752 
3753   SDValue Op0 = Node->getOperand(0);
3754   SDValue Op1 = Node->getOperand(1);
3755   SDValue combined;
3756   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3757          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3758     SDNode *User = *UI;
3759     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3760         User->use_empty())
3761       continue;
3762     // Convert the other matching node(s), too;
3763     // otherwise, the DIVREM may get target-legalized into something
3764     // target-specific that we won't be able to recognize.
3765     unsigned UserOpc = User->getOpcode();
3766     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3767         User->getOperand(0) == Op0 &&
3768         User->getOperand(1) == Op1) {
3769       if (!combined) {
3770         if (UserOpc == OtherOpcode) {
3771           SDVTList VTs = DAG.getVTList(VT, VT);
3772           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3773         } else if (UserOpc == DivRemOpc) {
3774           combined = SDValue(User, 0);
3775         } else {
3776           assert(UserOpc == Opcode);
3777           continue;
3778         }
3779       }
3780       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3781         CombineTo(User, combined);
3782       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3783         CombineTo(User, combined.getValue(1));
3784     }
3785   }
3786   return combined;
3787 }
3788 
3789 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3790   SDValue N0 = N->getOperand(0);
3791   SDValue N1 = N->getOperand(1);
3792   EVT VT = N->getValueType(0);
3793   SDLoc DL(N);
3794 
3795   unsigned Opc = N->getOpcode();
3796   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3797   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3798 
3799   // X / undef -> undef
3800   // X % undef -> undef
3801   // X / 0 -> undef
3802   // X % 0 -> undef
3803   // NOTE: This includes vectors where any divisor element is zero/undef.
3804   if (DAG.isUndef(Opc, {N0, N1}))
3805     return DAG.getUNDEF(VT);
3806 
3807   // undef / X -> 0
3808   // undef % X -> 0
3809   if (N0.isUndef())
3810     return DAG.getConstant(0, DL, VT);
3811 
3812   // 0 / X -> 0
3813   // 0 % X -> 0
3814   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3815   if (N0C && N0C->isNullValue())
3816     return N0;
3817 
3818   // X / X -> 1
3819   // X % X -> 0
3820   if (N0 == N1)
3821     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3822 
3823   // X / 1 -> X
3824   // X % 1 -> 0
3825   // If this is a boolean op (single-bit element type), we can't have
3826   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3827   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3828   // it's a 1.
3829   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3830     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3831 
3832   return SDValue();
3833 }
3834 
3835 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3836   SDValue N0 = N->getOperand(0);
3837   SDValue N1 = N->getOperand(1);
3838   EVT VT = N->getValueType(0);
3839   EVT CCVT = getSetCCResultType(VT);
3840 
3841   // fold vector ops
3842   if (VT.isVector())
3843     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3844       return FoldedVOp;
3845 
3846   SDLoc DL(N);
3847 
3848   // fold (sdiv c1, c2) -> c1/c2
3849   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3850   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
3851     return C;
3852 
3853   // fold (sdiv X, -1) -> 0-X
3854   if (N1C && N1C->isAllOnesValue())
3855     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3856 
3857   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3858   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3859     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3860                          DAG.getConstant(1, DL, VT),
3861                          DAG.getConstant(0, DL, VT));
3862 
3863   if (SDValue V = simplifyDivRem(N, DAG))
3864     return V;
3865 
3866   if (SDValue NewSel = foldBinOpIntoSelect(N))
3867     return NewSel;
3868 
3869   // If we know the sign bits of both operands are zero, strength reduce to a
3870   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3871   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3872     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3873 
3874   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3875     // If the corresponding remainder node exists, update its users with
3876     // (Dividend - (Quotient * Divisor).
3877     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3878                                               { N0, N1 })) {
3879       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3880       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3881       AddToWorklist(Mul.getNode());
3882       AddToWorklist(Sub.getNode());
3883       CombineTo(RemNode, Sub);
3884     }
3885     return V;
3886   }
3887 
3888   // sdiv, srem -> sdivrem
3889   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3890   // true.  Otherwise, we break the simplification logic in visitREM().
3891   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3892   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3893     if (SDValue DivRem = useDivRem(N))
3894         return DivRem;
3895 
3896   return SDValue();
3897 }
3898 
3899 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3900   SDLoc DL(N);
3901   EVT VT = N->getValueType(0);
3902   EVT CCVT = getSetCCResultType(VT);
3903   unsigned BitWidth = VT.getScalarSizeInBits();
3904 
3905   // Helper for determining whether a value is a power-2 constant scalar or a
3906   // vector of such elements.
3907   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3908     if (C->isNullValue() || C->isOpaque())
3909       return false;
3910     if (C->getAPIntValue().isPowerOf2())
3911       return true;
3912     if ((-C->getAPIntValue()).isPowerOf2())
3913       return true;
3914     return false;
3915   };
3916 
3917   // fold (sdiv X, pow2) -> simple ops after legalize
3918   // FIXME: We check for the exact bit here because the generic lowering gives
3919   // better results in that case. The target-specific lowering should learn how
3920   // to handle exact sdivs efficiently.
3921   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3922     // Target-specific implementation of sdiv x, pow2.
3923     if (SDValue Res = BuildSDIVPow2(N))
3924       return Res;
3925 
3926     // Create constants that are functions of the shift amount value.
3927     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3928     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3929     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3930     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3931     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3932     if (!isConstantOrConstantVector(Inexact))
3933       return SDValue();
3934 
3935     // Splat the sign bit into the register
3936     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3937                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3938     AddToWorklist(Sign.getNode());
3939 
3940     // Add (N0 < 0) ? abs2 - 1 : 0;
3941     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3942     AddToWorklist(Srl.getNode());
3943     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3944     AddToWorklist(Add.getNode());
3945     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3946     AddToWorklist(Sra.getNode());
3947 
3948     // Special case: (sdiv X, 1) -> X
3949     // Special Case: (sdiv X, -1) -> 0-X
3950     SDValue One = DAG.getConstant(1, DL, VT);
3951     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3952     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3953     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3954     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3955     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3956 
3957     // If dividing by a positive value, we're done. Otherwise, the result must
3958     // be negated.
3959     SDValue Zero = DAG.getConstant(0, DL, VT);
3960     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3961 
3962     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3963     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3964     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3965     return Res;
3966   }
3967 
3968   // If integer divide is expensive and we satisfy the requirements, emit an
3969   // alternate sequence.  Targets may check function attributes for size/speed
3970   // trade-offs.
3971   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3972   if (isConstantOrConstantVector(N1) &&
3973       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3974     if (SDValue Op = BuildSDIV(N))
3975       return Op;
3976 
3977   return SDValue();
3978 }
3979 
3980 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3981   SDValue N0 = N->getOperand(0);
3982   SDValue N1 = N->getOperand(1);
3983   EVT VT = N->getValueType(0);
3984   EVT CCVT = getSetCCResultType(VT);
3985 
3986   // fold vector ops
3987   if (VT.isVector())
3988     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3989       return FoldedVOp;
3990 
3991   SDLoc DL(N);
3992 
3993   // fold (udiv c1, c2) -> c1/c2
3994   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3995   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
3996     return C;
3997 
3998   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3999   if (N1C && N1C->getAPIntValue().isAllOnesValue())
4000     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4001                          DAG.getConstant(1, DL, VT),
4002                          DAG.getConstant(0, DL, VT));
4003 
4004   if (SDValue V = simplifyDivRem(N, DAG))
4005     return V;
4006 
4007   if (SDValue NewSel = foldBinOpIntoSelect(N))
4008     return NewSel;
4009 
4010   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4011     // If the corresponding remainder node exists, update its users with
4012     // (Dividend - (Quotient * Divisor).
4013     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4014                                               { N0, N1 })) {
4015       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4016       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4017       AddToWorklist(Mul.getNode());
4018       AddToWorklist(Sub.getNode());
4019       CombineTo(RemNode, Sub);
4020     }
4021     return V;
4022   }
4023 
4024   // sdiv, srem -> sdivrem
4025   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4026   // true.  Otherwise, we break the simplification logic in visitREM().
4027   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4028   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4029     if (SDValue DivRem = useDivRem(N))
4030         return DivRem;
4031 
4032   return SDValue();
4033 }
4034 
4035 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4036   SDLoc DL(N);
4037   EVT VT = N->getValueType(0);
4038 
4039   // fold (udiv x, (1 << c)) -> x >>u c
4040   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4041       DAG.isKnownToBeAPowerOfTwo(N1)) {
4042     SDValue LogBase2 = BuildLogBase2(N1, DL);
4043     AddToWorklist(LogBase2.getNode());
4044 
4045     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4046     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4047     AddToWorklist(Trunc.getNode());
4048     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4049   }
4050 
4051   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4052   if (N1.getOpcode() == ISD::SHL) {
4053     SDValue N10 = N1.getOperand(0);
4054     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4055         DAG.isKnownToBeAPowerOfTwo(N10)) {
4056       SDValue LogBase2 = BuildLogBase2(N10, DL);
4057       AddToWorklist(LogBase2.getNode());
4058 
4059       EVT ADDVT = N1.getOperand(1).getValueType();
4060       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4061       AddToWorklist(Trunc.getNode());
4062       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4063       AddToWorklist(Add.getNode());
4064       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4065     }
4066   }
4067 
4068   // fold (udiv x, c) -> alternate
4069   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4070   if (isConstantOrConstantVector(N1) &&
4071       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4072     if (SDValue Op = BuildUDIV(N))
4073       return Op;
4074 
4075   return SDValue();
4076 }
4077 
4078 // handles ISD::SREM and ISD::UREM
4079 SDValue DAGCombiner::visitREM(SDNode *N) {
4080   unsigned Opcode = N->getOpcode();
4081   SDValue N0 = N->getOperand(0);
4082   SDValue N1 = N->getOperand(1);
4083   EVT VT = N->getValueType(0);
4084   EVT CCVT = getSetCCResultType(VT);
4085 
4086   bool isSigned = (Opcode == ISD::SREM);
4087   SDLoc DL(N);
4088 
4089   // fold (rem c1, c2) -> c1%c2
4090   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4091   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4092     return C;
4093 
4094   // fold (urem X, -1) -> select(X == -1, 0, x)
4095   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4096     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4097                          DAG.getConstant(0, DL, VT), N0);
4098 
4099   if (SDValue V = simplifyDivRem(N, DAG))
4100     return V;
4101 
4102   if (SDValue NewSel = foldBinOpIntoSelect(N))
4103     return NewSel;
4104 
4105   if (isSigned) {
4106     // If we know the sign bits of both operands are zero, strength reduce to a
4107     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4108     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4109       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4110   } else {
4111     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4112     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4113       // fold (urem x, pow2) -> (and x, pow2-1)
4114       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4115       AddToWorklist(Add.getNode());
4116       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4117     }
4118     if (N1.getOpcode() == ISD::SHL &&
4119         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4120       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4121       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4122       AddToWorklist(Add.getNode());
4123       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4124     }
4125   }
4126 
4127   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4128 
4129   // If X/C can be simplified by the division-by-constant logic, lower
4130   // X%C to the equivalent of X-X/C*C.
4131   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4132   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4133   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4134   // combine will not return a DIVREM.  Regardless, checking cheapness here
4135   // makes sense since the simplification results in fatter code.
4136   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4137     SDValue OptimizedDiv =
4138         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4139     if (OptimizedDiv.getNode()) {
4140       // If the equivalent Div node also exists, update its users.
4141       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4142       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4143                                                 { N0, N1 }))
4144         CombineTo(DivNode, OptimizedDiv);
4145       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4146       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4147       AddToWorklist(OptimizedDiv.getNode());
4148       AddToWorklist(Mul.getNode());
4149       return Sub;
4150     }
4151   }
4152 
4153   // sdiv, srem -> sdivrem
4154   if (SDValue DivRem = useDivRem(N))
4155     return DivRem.getValue(1);
4156 
4157   return SDValue();
4158 }
4159 
4160 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4161   SDValue N0 = N->getOperand(0);
4162   SDValue N1 = N->getOperand(1);
4163   EVT VT = N->getValueType(0);
4164   SDLoc DL(N);
4165 
4166   if (VT.isVector()) {
4167     // fold (mulhs x, 0) -> 0
4168     // do not return N0/N1, because undef node may exist.
4169     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4170         ISD::isBuildVectorAllZeros(N1.getNode()))
4171       return DAG.getConstant(0, DL, VT);
4172   }
4173 
4174   // fold (mulhs x, 0) -> 0
4175   if (isNullConstant(N1))
4176     return N1;
4177   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4178   if (isOneConstant(N1))
4179     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4180                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4181                                        getShiftAmountTy(N0.getValueType())));
4182 
4183   // fold (mulhs x, undef) -> 0
4184   if (N0.isUndef() || N1.isUndef())
4185     return DAG.getConstant(0, DL, VT);
4186 
4187   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4188   // plus a shift.
4189   if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
4190     MVT Simple = VT.getSimpleVT();
4191     unsigned SimpleSize = Simple.getSizeInBits();
4192     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4193     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4194       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4195       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4196       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4197       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4198             DAG.getConstant(SimpleSize, DL,
4199                             getShiftAmountTy(N1.getValueType())));
4200       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4201     }
4202   }
4203 
4204   return SDValue();
4205 }
4206 
4207 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4208   SDValue N0 = N->getOperand(0);
4209   SDValue N1 = N->getOperand(1);
4210   EVT VT = N->getValueType(0);
4211   SDLoc DL(N);
4212 
4213   if (VT.isVector()) {
4214     // fold (mulhu x, 0) -> 0
4215     // do not return N0/N1, because undef node may exist.
4216     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4217         ISD::isBuildVectorAllZeros(N1.getNode()))
4218       return DAG.getConstant(0, DL, VT);
4219   }
4220 
4221   // fold (mulhu x, 0) -> 0
4222   if (isNullConstant(N1))
4223     return N1;
4224   // fold (mulhu x, 1) -> 0
4225   if (isOneConstant(N1))
4226     return DAG.getConstant(0, DL, N0.getValueType());
4227   // fold (mulhu x, undef) -> 0
4228   if (N0.isUndef() || N1.isUndef())
4229     return DAG.getConstant(0, DL, VT);
4230 
4231   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4232   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4233       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4234     unsigned NumEltBits = VT.getScalarSizeInBits();
4235     SDValue LogBase2 = BuildLogBase2(N1, DL);
4236     SDValue SRLAmt = DAG.getNode(
4237         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4238     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4239     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4240     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4241   }
4242 
4243   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4244   // plus a shift.
4245   if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
4246     MVT Simple = VT.getSimpleVT();
4247     unsigned SimpleSize = Simple.getSizeInBits();
4248     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4249     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4250       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4251       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4252       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4253       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4254             DAG.getConstant(SimpleSize, DL,
4255                             getShiftAmountTy(N1.getValueType())));
4256       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4257     }
4258   }
4259 
4260   return SDValue();
4261 }
4262 
4263 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4264 /// give the opcodes for the two computations that are being performed. Return
4265 /// true if a simplification was made.
4266 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4267                                                 unsigned HiOp) {
4268   // If the high half is not needed, just compute the low half.
4269   bool HiExists = N->hasAnyUseOfValue(1);
4270   if (!HiExists && (!LegalOperations ||
4271                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4272     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4273     return CombineTo(N, Res, Res);
4274   }
4275 
4276   // If the low half is not needed, just compute the high half.
4277   bool LoExists = N->hasAnyUseOfValue(0);
4278   if (!LoExists && (!LegalOperations ||
4279                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4280     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4281     return CombineTo(N, Res, Res);
4282   }
4283 
4284   // If both halves are used, return as it is.
4285   if (LoExists && HiExists)
4286     return SDValue();
4287 
4288   // If the two computed results can be simplified separately, separate them.
4289   if (LoExists) {
4290     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4291     AddToWorklist(Lo.getNode());
4292     SDValue LoOpt = combine(Lo.getNode());
4293     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4294         (!LegalOperations ||
4295          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4296       return CombineTo(N, LoOpt, LoOpt);
4297   }
4298 
4299   if (HiExists) {
4300     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4301     AddToWorklist(Hi.getNode());
4302     SDValue HiOpt = combine(Hi.getNode());
4303     if (HiOpt.getNode() && HiOpt != Hi &&
4304         (!LegalOperations ||
4305          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4306       return CombineTo(N, HiOpt, HiOpt);
4307   }
4308 
4309   return SDValue();
4310 }
4311 
4312 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4313   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4314     return Res;
4315 
4316   EVT VT = N->getValueType(0);
4317   SDLoc DL(N);
4318 
4319   // If the type is twice as wide is legal, transform the mulhu to a wider
4320   // multiply plus a shift.
4321   if (VT.isSimple() && !VT.isVector()) {
4322     MVT Simple = VT.getSimpleVT();
4323     unsigned SimpleSize = Simple.getSizeInBits();
4324     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4325     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4326       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4327       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4328       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4329       // Compute the high part as N1.
4330       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4331             DAG.getConstant(SimpleSize, DL,
4332                             getShiftAmountTy(Lo.getValueType())));
4333       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4334       // Compute the low part as N0.
4335       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4336       return CombineTo(N, Lo, Hi);
4337     }
4338   }
4339 
4340   return SDValue();
4341 }
4342 
4343 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4344   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4345     return Res;
4346 
4347   EVT VT = N->getValueType(0);
4348   SDLoc DL(N);
4349 
4350   // (umul_lohi N0, 0) -> (0, 0)
4351   if (isNullConstant(N->getOperand(1))) {
4352     SDValue Zero = DAG.getConstant(0, DL, VT);
4353     return CombineTo(N, Zero, Zero);
4354   }
4355 
4356   // (umul_lohi N0, 1) -> (N0, 0)
4357   if (isOneConstant(N->getOperand(1))) {
4358     SDValue Zero = DAG.getConstant(0, DL, VT);
4359     return CombineTo(N, N->getOperand(0), Zero);
4360   }
4361 
4362   // If the type is twice as wide is legal, transform the mulhu to a wider
4363   // multiply plus a shift.
4364   if (VT.isSimple() && !VT.isVector()) {
4365     MVT Simple = VT.getSimpleVT();
4366     unsigned SimpleSize = Simple.getSizeInBits();
4367     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4368     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4369       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4370       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4371       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4372       // Compute the high part as N1.
4373       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4374             DAG.getConstant(SimpleSize, DL,
4375                             getShiftAmountTy(Lo.getValueType())));
4376       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4377       // Compute the low part as N0.
4378       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4379       return CombineTo(N, Lo, Hi);
4380     }
4381   }
4382 
4383   return SDValue();
4384 }
4385 
4386 SDValue DAGCombiner::visitMULO(SDNode *N) {
4387   SDValue N0 = N->getOperand(0);
4388   SDValue N1 = N->getOperand(1);
4389   EVT VT = N0.getValueType();
4390   bool IsSigned = (ISD::SMULO == N->getOpcode());
4391 
4392   EVT CarryVT = N->getValueType(1);
4393   SDLoc DL(N);
4394 
4395   // canonicalize constant to RHS.
4396   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4397       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4398     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4399 
4400   // fold (mulo x, 0) -> 0 + no carry out
4401   if (isNullOrNullSplat(N1))
4402     return CombineTo(N, DAG.getConstant(0, DL, VT),
4403                      DAG.getConstant(0, DL, CarryVT));
4404 
4405   // (mulo x, 2) -> (addo x, x)
4406   if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4407     if (C2->getAPIntValue() == 2)
4408       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4409                          N->getVTList(), N0, N0);
4410 
4411   return SDValue();
4412 }
4413 
4414 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4415   SDValue N0 = N->getOperand(0);
4416   SDValue N1 = N->getOperand(1);
4417   EVT VT = N0.getValueType();
4418   unsigned Opcode = N->getOpcode();
4419 
4420   // fold vector ops
4421   if (VT.isVector())
4422     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4423       return FoldedVOp;
4424 
4425   // fold operation with constant operands.
4426   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4427     return C;
4428 
4429   // canonicalize constant to RHS
4430   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4431       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4432     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4433 
4434   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4435   // Only do this if the current op isn't legal and the flipped is.
4436   if (!TLI.isOperationLegal(Opcode, VT) &&
4437       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4438       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4439     unsigned AltOpcode;
4440     switch (Opcode) {
4441     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4442     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4443     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4444     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4445     default: llvm_unreachable("Unknown MINMAX opcode");
4446     }
4447     if (TLI.isOperationLegal(AltOpcode, VT))
4448       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4449   }
4450 
4451   return SDValue();
4452 }
4453 
4454 /// If this is a bitwise logic instruction and both operands have the same
4455 /// opcode, try to sink the other opcode after the logic instruction.
4456 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4457   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4458   EVT VT = N0.getValueType();
4459   unsigned LogicOpcode = N->getOpcode();
4460   unsigned HandOpcode = N0.getOpcode();
4461   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4462           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4463   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4464 
4465   // Bail early if none of these transforms apply.
4466   if (N0.getNumOperands() == 0)
4467     return SDValue();
4468 
4469   // FIXME: We should check number of uses of the operands to not increase
4470   //        the instruction count for all transforms.
4471 
4472   // Handle size-changing casts.
4473   SDValue X = N0.getOperand(0);
4474   SDValue Y = N1.getOperand(0);
4475   EVT XVT = X.getValueType();
4476   SDLoc DL(N);
4477   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4478       HandOpcode == ISD::SIGN_EXTEND) {
4479     // If both operands have other uses, this transform would create extra
4480     // instructions without eliminating anything.
4481     if (!N0.hasOneUse() && !N1.hasOneUse())
4482       return SDValue();
4483     // We need matching integer source types.
4484     if (XVT != Y.getValueType())
4485       return SDValue();
4486     // Don't create an illegal op during or after legalization. Don't ever
4487     // create an unsupported vector op.
4488     if ((VT.isVector() || LegalOperations) &&
4489         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4490       return SDValue();
4491     // Avoid infinite looping with PromoteIntBinOp.
4492     // TODO: Should we apply desirable/legal constraints to all opcodes?
4493     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4494         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4495       return SDValue();
4496     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4497     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4498     return DAG.getNode(HandOpcode, DL, VT, Logic);
4499   }
4500 
4501   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4502   if (HandOpcode == ISD::TRUNCATE) {
4503     // If both operands have other uses, this transform would create extra
4504     // instructions without eliminating anything.
4505     if (!N0.hasOneUse() && !N1.hasOneUse())
4506       return SDValue();
4507     // We need matching source types.
4508     if (XVT != Y.getValueType())
4509       return SDValue();
4510     // Don't create an illegal op during or after legalization.
4511     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4512       return SDValue();
4513     // Be extra careful sinking truncate. If it's free, there's no benefit in
4514     // widening a binop. Also, don't create a logic op on an illegal type.
4515     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4516       return SDValue();
4517     if (!TLI.isTypeLegal(XVT))
4518       return SDValue();
4519     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4520     return DAG.getNode(HandOpcode, DL, VT, Logic);
4521   }
4522 
4523   // For binops SHL/SRL/SRA/AND:
4524   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4525   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4526        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4527       N0.getOperand(1) == N1.getOperand(1)) {
4528     // If either operand has other uses, this transform is not an improvement.
4529     if (!N0.hasOneUse() || !N1.hasOneUse())
4530       return SDValue();
4531     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4532     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4533   }
4534 
4535   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4536   if (HandOpcode == ISD::BSWAP) {
4537     // If either operand has other uses, this transform is not an improvement.
4538     if (!N0.hasOneUse() || !N1.hasOneUse())
4539       return SDValue();
4540     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4541     return DAG.getNode(HandOpcode, DL, VT, Logic);
4542   }
4543 
4544   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4545   // Only perform this optimization up until type legalization, before
4546   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4547   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4548   // we don't want to undo this promotion.
4549   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4550   // on scalars.
4551   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4552        Level <= AfterLegalizeTypes) {
4553     // Input types must be integer and the same.
4554     if (XVT.isInteger() && XVT == Y.getValueType() &&
4555         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4556           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4557       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4558       return DAG.getNode(HandOpcode, DL, VT, Logic);
4559     }
4560   }
4561 
4562   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4563   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4564   // If both shuffles use the same mask, and both shuffle within a single
4565   // vector, then it is worthwhile to move the swizzle after the operation.
4566   // The type-legalizer generates this pattern when loading illegal
4567   // vector types from memory. In many cases this allows additional shuffle
4568   // optimizations.
4569   // There are other cases where moving the shuffle after the xor/and/or
4570   // is profitable even if shuffles don't perform a swizzle.
4571   // If both shuffles use the same mask, and both shuffles have the same first
4572   // or second operand, then it might still be profitable to move the shuffle
4573   // after the xor/and/or operation.
4574   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4575     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4576     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4577     assert(X.getValueType() == Y.getValueType() &&
4578            "Inputs to shuffles are not the same type");
4579 
4580     // Check that both shuffles use the same mask. The masks are known to be of
4581     // the same length because the result vector type is the same.
4582     // Check also that shuffles have only one use to avoid introducing extra
4583     // instructions.
4584     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4585         !SVN0->getMask().equals(SVN1->getMask()))
4586       return SDValue();
4587 
4588     // Don't try to fold this node if it requires introducing a
4589     // build vector of all zeros that might be illegal at this stage.
4590     SDValue ShOp = N0.getOperand(1);
4591     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4592       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4593 
4594     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4595     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4596       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4597                                   N0.getOperand(0), N1.getOperand(0));
4598       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4599     }
4600 
4601     // Don't try to fold this node if it requires introducing a
4602     // build vector of all zeros that might be illegal at this stage.
4603     ShOp = N0.getOperand(0);
4604     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4605       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4606 
4607     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4608     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4609       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4610                                   N1.getOperand(1));
4611       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4612     }
4613   }
4614 
4615   return SDValue();
4616 }
4617 
4618 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4619 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4620                                        const SDLoc &DL) {
4621   SDValue LL, LR, RL, RR, N0CC, N1CC;
4622   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4623       !isSetCCEquivalent(N1, RL, RR, N1CC))
4624     return SDValue();
4625 
4626   assert(N0.getValueType() == N1.getValueType() &&
4627          "Unexpected operand types for bitwise logic op");
4628   assert(LL.getValueType() == LR.getValueType() &&
4629          RL.getValueType() == RR.getValueType() &&
4630          "Unexpected operand types for setcc");
4631 
4632   // If we're here post-legalization or the logic op type is not i1, the logic
4633   // op type must match a setcc result type. Also, all folds require new
4634   // operations on the left and right operands, so those types must match.
4635   EVT VT = N0.getValueType();
4636   EVT OpVT = LL.getValueType();
4637   if (LegalOperations || VT.getScalarType() != MVT::i1)
4638     if (VT != getSetCCResultType(OpVT))
4639       return SDValue();
4640   if (OpVT != RL.getValueType())
4641     return SDValue();
4642 
4643   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4644   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4645   bool IsInteger = OpVT.isInteger();
4646   if (LR == RR && CC0 == CC1 && IsInteger) {
4647     bool IsZero = isNullOrNullSplat(LR);
4648     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4649 
4650     // All bits clear?
4651     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4652     // All sign bits clear?
4653     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4654     // Any bits set?
4655     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4656     // Any sign bits set?
4657     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4658 
4659     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4660     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4661     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4662     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4663     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4664       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4665       AddToWorklist(Or.getNode());
4666       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4667     }
4668 
4669     // All bits set?
4670     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4671     // All sign bits set?
4672     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4673     // Any bits clear?
4674     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4675     // Any sign bits clear?
4676     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4677 
4678     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4679     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4680     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4681     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4682     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4683       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4684       AddToWorklist(And.getNode());
4685       return DAG.getSetCC(DL, VT, And, LR, CC1);
4686     }
4687   }
4688 
4689   // TODO: What is the 'or' equivalent of this fold?
4690   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4691   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4692       IsInteger && CC0 == ISD::SETNE &&
4693       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4694        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4695     SDValue One = DAG.getConstant(1, DL, OpVT);
4696     SDValue Two = DAG.getConstant(2, DL, OpVT);
4697     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4698     AddToWorklist(Add.getNode());
4699     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4700   }
4701 
4702   // Try more general transforms if the predicates match and the only user of
4703   // the compares is the 'and' or 'or'.
4704   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4705       N0.hasOneUse() && N1.hasOneUse()) {
4706     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4707     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4708     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4709       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4710       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4711       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4712       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4713       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4714     }
4715 
4716     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4717     // TODO - support non-uniform vector amounts.
4718     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4719       // Match a shared variable operand and 2 non-opaque constant operands.
4720       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4721       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4722       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4723         // Canonicalize larger constant as C0.
4724         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4725           std::swap(C0, C1);
4726 
4727         // The difference of the constants must be a single bit.
4728         const APInt &C0Val = C0->getAPIntValue();
4729         const APInt &C1Val = C1->getAPIntValue();
4730         if ((C0Val - C1Val).isPowerOf2()) {
4731           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4732           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4733           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4734           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4735           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4736           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4737           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4738           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4739         }
4740       }
4741     }
4742   }
4743 
4744   // Canonicalize equivalent operands to LL == RL.
4745   if (LL == RR && LR == RL) {
4746     CC1 = ISD::getSetCCSwappedOperands(CC1);
4747     std::swap(RL, RR);
4748   }
4749 
4750   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4751   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4752   if (LL == RL && LR == RR) {
4753     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
4754                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
4755     if (NewCC != ISD::SETCC_INVALID &&
4756         (!LegalOperations ||
4757          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4758           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4759       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4760   }
4761 
4762   return SDValue();
4763 }
4764 
4765 /// This contains all DAGCombine rules which reduce two values combined by
4766 /// an And operation to a single value. This makes them reusable in the context
4767 /// of visitSELECT(). Rules involving constants are not included as
4768 /// visitSELECT() already handles those cases.
4769 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4770   EVT VT = N1.getValueType();
4771   SDLoc DL(N);
4772 
4773   // fold (and x, undef) -> 0
4774   if (N0.isUndef() || N1.isUndef())
4775     return DAG.getConstant(0, DL, VT);
4776 
4777   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4778     return V;
4779 
4780   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4781       VT.getSizeInBits() <= 64) {
4782     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4783       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4784         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4785         // immediate for an add, but it is legal if its top c2 bits are set,
4786         // transform the ADD so the immediate doesn't need to be materialized
4787         // in a register.
4788         APInt ADDC = ADDI->getAPIntValue();
4789         APInt SRLC = SRLI->getAPIntValue();
4790         if (ADDC.getMinSignedBits() <= 64 &&
4791             SRLC.ult(VT.getSizeInBits()) &&
4792             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4793           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4794                                              SRLC.getZExtValue());
4795           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4796             ADDC |= Mask;
4797             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4798               SDLoc DL0(N0);
4799               SDValue NewAdd =
4800                 DAG.getNode(ISD::ADD, DL0, VT,
4801                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4802               CombineTo(N0.getNode(), NewAdd);
4803               // Return N so it doesn't get rechecked!
4804               return SDValue(N, 0);
4805             }
4806           }
4807         }
4808       }
4809     }
4810   }
4811 
4812   // Reduce bit extract of low half of an integer to the narrower type.
4813   // (and (srl i64:x, K), KMask) ->
4814   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4815   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4816     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4817       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4818         unsigned Size = VT.getSizeInBits();
4819         const APInt &AndMask = CAnd->getAPIntValue();
4820         unsigned ShiftBits = CShift->getZExtValue();
4821 
4822         // Bail out, this node will probably disappear anyway.
4823         if (ShiftBits == 0)
4824           return SDValue();
4825 
4826         unsigned MaskBits = AndMask.countTrailingOnes();
4827         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4828 
4829         if (AndMask.isMask() &&
4830             // Required bits must not span the two halves of the integer and
4831             // must fit in the half size type.
4832             (ShiftBits + MaskBits <= Size / 2) &&
4833             TLI.isNarrowingProfitable(VT, HalfVT) &&
4834             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4835             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4836             TLI.isTruncateFree(VT, HalfVT) &&
4837             TLI.isZExtFree(HalfVT, VT)) {
4838           // The isNarrowingProfitable is to avoid regressions on PPC and
4839           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4840           // on downstream users of this. Those patterns could probably be
4841           // extended to handle extensions mixed in.
4842 
4843           SDValue SL(N0);
4844           assert(MaskBits <= Size);
4845 
4846           // Extracting the highest bit of the low half.
4847           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4848           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4849                                       N0.getOperand(0));
4850 
4851           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4852           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4853           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4854           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4855           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4856         }
4857       }
4858     }
4859   }
4860 
4861   return SDValue();
4862 }
4863 
4864 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4865                                    EVT LoadResultTy, EVT &ExtVT) {
4866   if (!AndC->getAPIntValue().isMask())
4867     return false;
4868 
4869   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4870 
4871   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4872   EVT LoadedVT = LoadN->getMemoryVT();
4873 
4874   if (ExtVT == LoadedVT &&
4875       (!LegalOperations ||
4876        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4877     // ZEXTLOAD will match without needing to change the size of the value being
4878     // loaded.
4879     return true;
4880   }
4881 
4882   // Do not change the width of a volatile or atomic loads.
4883   if (!LoadN->isSimple())
4884     return false;
4885 
4886   // Do not generate loads of non-round integer types since these can
4887   // be expensive (and would be wrong if the type is not byte sized).
4888   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4889     return false;
4890 
4891   if (LegalOperations &&
4892       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4893     return false;
4894 
4895   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4896     return false;
4897 
4898   return true;
4899 }
4900 
4901 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4902                                     ISD::LoadExtType ExtType, EVT &MemVT,
4903                                     unsigned ShAmt) {
4904   if (!LDST)
4905     return false;
4906   // Only allow byte offsets.
4907   if (ShAmt % 8)
4908     return false;
4909 
4910   // Do not generate loads of non-round integer types since these can
4911   // be expensive (and would be wrong if the type is not byte sized).
4912   if (!MemVT.isRound())
4913     return false;
4914 
4915   // Don't change the width of a volatile or atomic loads.
4916   if (!LDST->isSimple())
4917     return false;
4918 
4919   // Verify that we are actually reducing a load width here.
4920   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4921     return false;
4922 
4923   // Ensure that this isn't going to produce an unsupported memory access.
4924   if (ShAmt) {
4925     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
4926     const unsigned ByteShAmt = ShAmt / 8;
4927     const Align LDSTAlign = LDST->getAlign();
4928     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
4929     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4930                                 LDST->getAddressSpace(), NarrowAlign,
4931                                 LDST->getMemOperand()->getFlags()))
4932       return false;
4933   }
4934 
4935   // It's not possible to generate a constant of extended or untyped type.
4936   EVT PtrType = LDST->getBasePtr().getValueType();
4937   if (PtrType == MVT::Untyped || PtrType.isExtended())
4938     return false;
4939 
4940   if (isa<LoadSDNode>(LDST)) {
4941     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4942     // Don't transform one with multiple uses, this would require adding a new
4943     // load.
4944     if (!SDValue(Load, 0).hasOneUse())
4945       return false;
4946 
4947     if (LegalOperations &&
4948         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4949       return false;
4950 
4951     // For the transform to be legal, the load must produce only two values
4952     // (the value loaded and the chain).  Don't transform a pre-increment
4953     // load, for example, which produces an extra value.  Otherwise the
4954     // transformation is not equivalent, and the downstream logic to replace
4955     // uses gets things wrong.
4956     if (Load->getNumValues() > 2)
4957       return false;
4958 
4959     // If the load that we're shrinking is an extload and we're not just
4960     // discarding the extension we can't simply shrink the load. Bail.
4961     // TODO: It would be possible to merge the extensions in some cases.
4962     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4963         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4964       return false;
4965 
4966     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4967       return false;
4968   } else {
4969     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4970     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4971     // Can't write outside the original store
4972     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4973       return false;
4974 
4975     if (LegalOperations &&
4976         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4977       return false;
4978   }
4979   return true;
4980 }
4981 
4982 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4983                                     SmallVectorImpl<LoadSDNode*> &Loads,
4984                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4985                                     ConstantSDNode *Mask,
4986                                     SDNode *&NodeToMask) {
4987   // Recursively search for the operands, looking for loads which can be
4988   // narrowed.
4989   for (SDValue Op : N->op_values()) {
4990     if (Op.getValueType().isVector())
4991       return false;
4992 
4993     // Some constants may need fixing up later if they are too large.
4994     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4995       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4996           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4997         NodesWithConsts.insert(N);
4998       continue;
4999     }
5000 
5001     if (!Op.hasOneUse())
5002       return false;
5003 
5004     switch(Op.getOpcode()) {
5005     case ISD::LOAD: {
5006       auto *Load = cast<LoadSDNode>(Op);
5007       EVT ExtVT;
5008       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5009           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5010 
5011         // ZEXTLOAD is already small enough.
5012         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5013             ExtVT.bitsGE(Load->getMemoryVT()))
5014           continue;
5015 
5016         // Use LE to convert equal sized loads to zext.
5017         if (ExtVT.bitsLE(Load->getMemoryVT()))
5018           Loads.push_back(Load);
5019 
5020         continue;
5021       }
5022       return false;
5023     }
5024     case ISD::ZERO_EXTEND:
5025     case ISD::AssertZext: {
5026       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5027       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5028       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5029         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5030         Op.getOperand(0).getValueType();
5031 
5032       // We can accept extending nodes if the mask is wider or an equal
5033       // width to the original type.
5034       if (ExtVT.bitsGE(VT))
5035         continue;
5036       break;
5037     }
5038     case ISD::OR:
5039     case ISD::XOR:
5040     case ISD::AND:
5041       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5042                              NodeToMask))
5043         return false;
5044       continue;
5045     }
5046 
5047     // Allow one node which will masked along with any loads found.
5048     if (NodeToMask)
5049       return false;
5050 
5051     // Also ensure that the node to be masked only produces one data result.
5052     NodeToMask = Op.getNode();
5053     if (NodeToMask->getNumValues() > 1) {
5054       bool HasValue = false;
5055       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5056         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5057         if (VT != MVT::Glue && VT != MVT::Other) {
5058           if (HasValue) {
5059             NodeToMask = nullptr;
5060             return false;
5061           }
5062           HasValue = true;
5063         }
5064       }
5065       assert(HasValue && "Node to be masked has no data result?");
5066     }
5067   }
5068   return true;
5069 }
5070 
5071 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5072   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5073   if (!Mask)
5074     return false;
5075 
5076   if (!Mask->getAPIntValue().isMask())
5077     return false;
5078 
5079   // No need to do anything if the and directly uses a load.
5080   if (isa<LoadSDNode>(N->getOperand(0)))
5081     return false;
5082 
5083   SmallVector<LoadSDNode*, 8> Loads;
5084   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5085   SDNode *FixupNode = nullptr;
5086   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5087     if (Loads.size() == 0)
5088       return false;
5089 
5090     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5091     SDValue MaskOp = N->getOperand(1);
5092 
5093     // If it exists, fixup the single node we allow in the tree that needs
5094     // masking.
5095     if (FixupNode) {
5096       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5097       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5098                                 FixupNode->getValueType(0),
5099                                 SDValue(FixupNode, 0), MaskOp);
5100       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5101       if (And.getOpcode() == ISD ::AND)
5102         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5103     }
5104 
5105     // Narrow any constants that need it.
5106     for (auto *LogicN : NodesWithConsts) {
5107       SDValue Op0 = LogicN->getOperand(0);
5108       SDValue Op1 = LogicN->getOperand(1);
5109 
5110       if (isa<ConstantSDNode>(Op0))
5111           std::swap(Op0, Op1);
5112 
5113       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5114                                 Op1, MaskOp);
5115 
5116       DAG.UpdateNodeOperands(LogicN, Op0, And);
5117     }
5118 
5119     // Create narrow loads.
5120     for (auto *Load : Loads) {
5121       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5122       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5123                                 SDValue(Load, 0), MaskOp);
5124       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5125       if (And.getOpcode() == ISD ::AND)
5126         And = SDValue(
5127             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5128       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5129       assert(NewLoad &&
5130              "Shouldn't be masking the load if it can't be narrowed");
5131       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5132     }
5133     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5134     return true;
5135   }
5136   return false;
5137 }
5138 
5139 // Unfold
5140 //    x &  (-1 'logical shift' y)
5141 // To
5142 //    (x 'opposite logical shift' y) 'logical shift' y
5143 // if it is better for performance.
5144 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5145   assert(N->getOpcode() == ISD::AND);
5146 
5147   SDValue N0 = N->getOperand(0);
5148   SDValue N1 = N->getOperand(1);
5149 
5150   // Do we actually prefer shifts over mask?
5151   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5152     return SDValue();
5153 
5154   // Try to match  (-1 '[outer] logical shift' y)
5155   unsigned OuterShift;
5156   unsigned InnerShift; // The opposite direction to the OuterShift.
5157   SDValue Y;           // Shift amount.
5158   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5159     if (!M.hasOneUse())
5160       return false;
5161     OuterShift = M->getOpcode();
5162     if (OuterShift == ISD::SHL)
5163       InnerShift = ISD::SRL;
5164     else if (OuterShift == ISD::SRL)
5165       InnerShift = ISD::SHL;
5166     else
5167       return false;
5168     if (!isAllOnesConstant(M->getOperand(0)))
5169       return false;
5170     Y = M->getOperand(1);
5171     return true;
5172   };
5173 
5174   SDValue X;
5175   if (matchMask(N1))
5176     X = N0;
5177   else if (matchMask(N0))
5178     X = N1;
5179   else
5180     return SDValue();
5181 
5182   SDLoc DL(N);
5183   EVT VT = N->getValueType(0);
5184 
5185   //     tmp = x   'opposite logical shift' y
5186   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5187   //     ret = tmp 'logical shift' y
5188   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5189 
5190   return T1;
5191 }
5192 
5193 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5194 /// For a target with a bit test, this is expected to become test + set and save
5195 /// at least 1 instruction.
5196 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5197   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5198 
5199   // This is probably not worthwhile without a supported type.
5200   EVT VT = And->getValueType(0);
5201   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5202   if (!TLI.isTypeLegal(VT))
5203     return SDValue();
5204 
5205   // Look through an optional extension and find a 'not'.
5206   // TODO: Should we favor test+set even without the 'not' op?
5207   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5208   if (Not.getOpcode() == ISD::ANY_EXTEND)
5209     Not = Not.getOperand(0);
5210   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5211     return SDValue();
5212 
5213   // Look though an optional truncation. The source operand may not be the same
5214   // type as the original 'and', but that is ok because we are masking off
5215   // everything but the low bit.
5216   SDValue Srl = Not.getOperand(0);
5217   if (Srl.getOpcode() == ISD::TRUNCATE)
5218     Srl = Srl.getOperand(0);
5219 
5220   // Match a shift-right by constant.
5221   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5222       !isa<ConstantSDNode>(Srl.getOperand(1)))
5223     return SDValue();
5224 
5225   // We might have looked through casts that make this transform invalid.
5226   // TODO: If the source type is wider than the result type, do the mask and
5227   //       compare in the source type.
5228   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5229   unsigned VTBitWidth = VT.getSizeInBits();
5230   if (ShiftAmt.uge(VTBitWidth))
5231     return SDValue();
5232 
5233   // Turn this into a bit-test pattern using mask op + setcc:
5234   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5235   SDLoc DL(And);
5236   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5237   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5238   SDValue Mask = DAG.getConstant(
5239       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5240   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5241   SDValue Zero = DAG.getConstant(0, DL, VT);
5242   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5243   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5244 }
5245 
5246 SDValue DAGCombiner::visitAND(SDNode *N) {
5247   SDValue N0 = N->getOperand(0);
5248   SDValue N1 = N->getOperand(1);
5249   EVT VT = N1.getValueType();
5250 
5251   // x & x --> x
5252   if (N0 == N1)
5253     return N0;
5254 
5255   // fold vector ops
5256   if (VT.isVector()) {
5257     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5258       return FoldedVOp;
5259 
5260     // fold (and x, 0) -> 0, vector edition
5261     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5262       // do not return N0, because undef node may exist in N0
5263       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5264                              SDLoc(N), N0.getValueType());
5265     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5266       // do not return N1, because undef node may exist in N1
5267       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5268                              SDLoc(N), N1.getValueType());
5269 
5270     // fold (and x, -1) -> x, vector edition
5271     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5272       return N1;
5273     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5274       return N0;
5275   }
5276 
5277   // fold (and c1, c2) -> c1&c2
5278   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5279   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5280     return C;
5281 
5282   // canonicalize constant to RHS
5283   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5284       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5285     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5286 
5287   // fold (and x, -1) -> x
5288   if (isAllOnesConstant(N1))
5289     return N0;
5290 
5291   // if (and x, c) is known to be zero, return 0
5292   unsigned BitWidth = VT.getScalarSizeInBits();
5293   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5294                                    APInt::getAllOnesValue(BitWidth)))
5295     return DAG.getConstant(0, SDLoc(N), VT);
5296 
5297   if (SDValue NewSel = foldBinOpIntoSelect(N))
5298     return NewSel;
5299 
5300   // reassociate and
5301   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5302     return RAND;
5303 
5304   // Try to convert a constant mask AND into a shuffle clear mask.
5305   if (VT.isVector())
5306     if (SDValue Shuffle = XformToShuffleWithZero(N))
5307       return Shuffle;
5308 
5309   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5310     return Combined;
5311 
5312   // fold (and (or x, C), D) -> D if (C & D) == D
5313   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5314     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5315   };
5316   if (N0.getOpcode() == ISD::OR &&
5317       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5318     return N1;
5319   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5320   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5321     SDValue N0Op0 = N0.getOperand(0);
5322     APInt Mask = ~N1C->getAPIntValue();
5323     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5324     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5325       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5326                                  N0.getValueType(), N0Op0);
5327 
5328       // Replace uses of the AND with uses of the Zero extend node.
5329       CombineTo(N, Zext);
5330 
5331       // We actually want to replace all uses of the any_extend with the
5332       // zero_extend, to avoid duplicating things.  This will later cause this
5333       // AND to be folded.
5334       CombineTo(N0.getNode(), Zext);
5335       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5336     }
5337   }
5338 
5339   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5340   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5341   // already be zero by virtue of the width of the base type of the load.
5342   //
5343   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5344   // more cases.
5345   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5346        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5347        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5348        N0.getOperand(0).getResNo() == 0) ||
5349       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5350     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5351                                          N0 : N0.getOperand(0) );
5352 
5353     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5354     // This can be a pure constant or a vector splat, in which case we treat the
5355     // vector as a scalar and use the splat value.
5356     APInt Constant = APInt::getNullValue(1);
5357     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5358       Constant = C->getAPIntValue();
5359     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5360       APInt SplatValue, SplatUndef;
5361       unsigned SplatBitSize;
5362       bool HasAnyUndefs;
5363       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5364                                              SplatBitSize, HasAnyUndefs);
5365       if (IsSplat) {
5366         // Undef bits can contribute to a possible optimisation if set, so
5367         // set them.
5368         SplatValue |= SplatUndef;
5369 
5370         // The splat value may be something like "0x00FFFFFF", which means 0 for
5371         // the first vector value and FF for the rest, repeating. We need a mask
5372         // that will apply equally to all members of the vector, so AND all the
5373         // lanes of the constant together.
5374         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5375 
5376         // If the splat value has been compressed to a bitlength lower
5377         // than the size of the vector lane, we need to re-expand it to
5378         // the lane size.
5379         if (EltBitWidth > SplatBitSize)
5380           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5381                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5382             SplatValue |= SplatValue.shl(SplatBitSize);
5383 
5384         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5385         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5386         if ((SplatBitSize % EltBitWidth) == 0) {
5387           Constant = APInt::getAllOnesValue(EltBitWidth);
5388           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5389             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5390         }
5391       }
5392     }
5393 
5394     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5395     // actually legal and isn't going to get expanded, else this is a false
5396     // optimisation.
5397     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5398                                                     Load->getValueType(0),
5399                                                     Load->getMemoryVT());
5400 
5401     // Resize the constant to the same size as the original memory access before
5402     // extension. If it is still the AllOnesValue then this AND is completely
5403     // unneeded.
5404     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5405 
5406     bool B;
5407     switch (Load->getExtensionType()) {
5408     default: B = false; break;
5409     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5410     case ISD::ZEXTLOAD:
5411     case ISD::NON_EXTLOAD: B = true; break;
5412     }
5413 
5414     if (B && Constant.isAllOnesValue()) {
5415       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5416       // preserve semantics once we get rid of the AND.
5417       SDValue NewLoad(Load, 0);
5418 
5419       // Fold the AND away. NewLoad may get replaced immediately.
5420       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5421 
5422       if (Load->getExtensionType() == ISD::EXTLOAD) {
5423         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5424                               Load->getValueType(0), SDLoc(Load),
5425                               Load->getChain(), Load->getBasePtr(),
5426                               Load->getOffset(), Load->getMemoryVT(),
5427                               Load->getMemOperand());
5428         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5429         if (Load->getNumValues() == 3) {
5430           // PRE/POST_INC loads have 3 values.
5431           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5432                            NewLoad.getValue(2) };
5433           CombineTo(Load, To, 3, true);
5434         } else {
5435           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5436         }
5437       }
5438 
5439       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5440     }
5441   }
5442 
5443   // fold (and (load x), 255) -> (zextload x, i8)
5444   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5445   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5446   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5447                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5448                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5449     if (SDValue Res = ReduceLoadWidth(N)) {
5450       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5451         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5452       AddToWorklist(N);
5453       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5454       return SDValue(N, 0);
5455     }
5456   }
5457 
5458   if (LegalTypes) {
5459     // Attempt to propagate the AND back up to the leaves which, if they're
5460     // loads, can be combined to narrow loads and the AND node can be removed.
5461     // Perform after legalization so that extend nodes will already be
5462     // combined into the loads.
5463     if (BackwardsPropagateMask(N))
5464       return SDValue(N, 0);
5465   }
5466 
5467   if (SDValue Combined = visitANDLike(N0, N1, N))
5468     return Combined;
5469 
5470   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5471   if (N0.getOpcode() == N1.getOpcode())
5472     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5473       return V;
5474 
5475   // Masking the negated extension of a boolean is just the zero-extended
5476   // boolean:
5477   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5478   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5479   //
5480   // Note: the SimplifyDemandedBits fold below can make an information-losing
5481   // transform, and then we have no way to find this better fold.
5482   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5483     if (isNullOrNullSplat(N0.getOperand(0))) {
5484       SDValue SubRHS = N0.getOperand(1);
5485       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5486           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5487         return SubRHS;
5488       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5489           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5490         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5491     }
5492   }
5493 
5494   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5495   // fold (and (sra)) -> (and (srl)) when possible.
5496   if (SimplifyDemandedBits(SDValue(N, 0)))
5497     return SDValue(N, 0);
5498 
5499   // fold (zext_inreg (extload x)) -> (zextload x)
5500   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5501   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5502       (ISD::isEXTLoad(N0.getNode()) ||
5503        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5504     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5505     EVT MemVT = LN0->getMemoryVT();
5506     // If we zero all the possible extended bits, then we can turn this into
5507     // a zextload if we are running before legalize or the operation is legal.
5508     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5509     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5510     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5511     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5512         ((!LegalOperations && LN0->isSimple()) ||
5513          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5514       SDValue ExtLoad =
5515           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5516                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5517       AddToWorklist(N);
5518       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5519       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5520     }
5521   }
5522 
5523   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5524   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5525     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5526                                            N0.getOperand(1), false))
5527       return BSwap;
5528   }
5529 
5530   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5531     return Shifts;
5532 
5533   if (TLI.hasBitTest(N0, N1))
5534     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5535       return V;
5536 
5537   return SDValue();
5538 }
5539 
5540 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5541 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5542                                         bool DemandHighBits) {
5543   if (!LegalOperations)
5544     return SDValue();
5545 
5546   EVT VT = N->getValueType(0);
5547   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5548     return SDValue();
5549   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5550     return SDValue();
5551 
5552   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5553   bool LookPassAnd0 = false;
5554   bool LookPassAnd1 = false;
5555   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5556       std::swap(N0, N1);
5557   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5558       std::swap(N0, N1);
5559   if (N0.getOpcode() == ISD::AND) {
5560     if (!N0.getNode()->hasOneUse())
5561       return SDValue();
5562     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5563     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5564     // This is needed for X86.
5565     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5566                   N01C->getZExtValue() != 0xFFFF))
5567       return SDValue();
5568     N0 = N0.getOperand(0);
5569     LookPassAnd0 = true;
5570   }
5571 
5572   if (N1.getOpcode() == ISD::AND) {
5573     if (!N1.getNode()->hasOneUse())
5574       return SDValue();
5575     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5576     if (!N11C || N11C->getZExtValue() != 0xFF)
5577       return SDValue();
5578     N1 = N1.getOperand(0);
5579     LookPassAnd1 = true;
5580   }
5581 
5582   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5583     std::swap(N0, N1);
5584   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5585     return SDValue();
5586   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5587     return SDValue();
5588 
5589   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5590   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5591   if (!N01C || !N11C)
5592     return SDValue();
5593   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5594     return SDValue();
5595 
5596   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5597   SDValue N00 = N0->getOperand(0);
5598   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5599     if (!N00.getNode()->hasOneUse())
5600       return SDValue();
5601     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5602     if (!N001C || N001C->getZExtValue() != 0xFF)
5603       return SDValue();
5604     N00 = N00.getOperand(0);
5605     LookPassAnd0 = true;
5606   }
5607 
5608   SDValue N10 = N1->getOperand(0);
5609   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5610     if (!N10.getNode()->hasOneUse())
5611       return SDValue();
5612     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5613     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5614     // for X86.
5615     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5616                    N101C->getZExtValue() != 0xFFFF))
5617       return SDValue();
5618     N10 = N10.getOperand(0);
5619     LookPassAnd1 = true;
5620   }
5621 
5622   if (N00 != N10)
5623     return SDValue();
5624 
5625   // Make sure everything beyond the low halfword gets set to zero since the SRL
5626   // 16 will clear the top bits.
5627   unsigned OpSizeInBits = VT.getSizeInBits();
5628   if (DemandHighBits && OpSizeInBits > 16) {
5629     // If the left-shift isn't masked out then the only way this is a bswap is
5630     // if all bits beyond the low 8 are 0. In that case the entire pattern
5631     // reduces to a left shift anyway: leave it for other parts of the combiner.
5632     if (!LookPassAnd0)
5633       return SDValue();
5634 
5635     // However, if the right shift isn't masked out then it might be because
5636     // it's not needed. See if we can spot that too.
5637     if (!LookPassAnd1 &&
5638         !DAG.MaskedValueIsZero(
5639             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5640       return SDValue();
5641   }
5642 
5643   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5644   if (OpSizeInBits > 16) {
5645     SDLoc DL(N);
5646     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5647                       DAG.getConstant(OpSizeInBits - 16, DL,
5648                                       getShiftAmountTy(VT)));
5649   }
5650   return Res;
5651 }
5652 
5653 /// Return true if the specified node is an element that makes up a 32-bit
5654 /// packed halfword byteswap.
5655 /// ((x & 0x000000ff) << 8) |
5656 /// ((x & 0x0000ff00) >> 8) |
5657 /// ((x & 0x00ff0000) << 8) |
5658 /// ((x & 0xff000000) >> 8)
5659 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5660   if (!N.getNode()->hasOneUse())
5661     return false;
5662 
5663   unsigned Opc = N.getOpcode();
5664   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5665     return false;
5666 
5667   SDValue N0 = N.getOperand(0);
5668   unsigned Opc0 = N0.getOpcode();
5669   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5670     return false;
5671 
5672   ConstantSDNode *N1C = nullptr;
5673   // SHL or SRL: look upstream for AND mask operand
5674   if (Opc == ISD::AND)
5675     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5676   else if (Opc0 == ISD::AND)
5677     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5678   if (!N1C)
5679     return false;
5680 
5681   unsigned MaskByteOffset;
5682   switch (N1C->getZExtValue()) {
5683   default:
5684     return false;
5685   case 0xFF:       MaskByteOffset = 0; break;
5686   case 0xFF00:     MaskByteOffset = 1; break;
5687   case 0xFFFF:
5688     // In case demanded bits didn't clear the bits that will be shifted out.
5689     // This is needed for X86.
5690     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5691       MaskByteOffset = 1;
5692       break;
5693     }
5694     return false;
5695   case 0xFF0000:   MaskByteOffset = 2; break;
5696   case 0xFF000000: MaskByteOffset = 3; break;
5697   }
5698 
5699   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5700   if (Opc == ISD::AND) {
5701     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5702       // (x >> 8) & 0xff
5703       // (x >> 8) & 0xff0000
5704       if (Opc0 != ISD::SRL)
5705         return false;
5706       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5707       if (!C || C->getZExtValue() != 8)
5708         return false;
5709     } else {
5710       // (x << 8) & 0xff00
5711       // (x << 8) & 0xff000000
5712       if (Opc0 != ISD::SHL)
5713         return false;
5714       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5715       if (!C || C->getZExtValue() != 8)
5716         return false;
5717     }
5718   } else if (Opc == ISD::SHL) {
5719     // (x & 0xff) << 8
5720     // (x & 0xff0000) << 8
5721     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5722       return false;
5723     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5724     if (!C || C->getZExtValue() != 8)
5725       return false;
5726   } else { // Opc == ISD::SRL
5727     // (x & 0xff00) >> 8
5728     // (x & 0xff000000) >> 8
5729     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5730       return false;
5731     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5732     if (!C || C->getZExtValue() != 8)
5733       return false;
5734   }
5735 
5736   if (Parts[MaskByteOffset])
5737     return false;
5738 
5739   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5740   return true;
5741 }
5742 
5743 // Match 2 elements of a packed halfword bswap.
5744 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
5745   if (N.getOpcode() == ISD::OR)
5746     return isBSwapHWordElement(N.getOperand(0), Parts) &&
5747            isBSwapHWordElement(N.getOperand(1), Parts);
5748 
5749   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
5750     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
5751     if (!C || C->getAPIntValue() != 16)
5752       return false;
5753     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
5754     return true;
5755   }
5756 
5757   return false;
5758 }
5759 
5760 // Match this pattern:
5761 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
5762 // And rewrite this to:
5763 //   (rotr (bswap A), 16)
5764 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
5765                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
5766                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
5767   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
5768          "MatchBSwapHWordOrAndAnd: expecting i32");
5769   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5770     return SDValue();
5771   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
5772     return SDValue();
5773   // TODO: this is too restrictive; lifting this restriction requires more tests
5774   if (!N0->hasOneUse() || !N1->hasOneUse())
5775     return SDValue();
5776   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
5777   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
5778   if (!Mask0 || !Mask1)
5779     return SDValue();
5780   if (Mask0->getAPIntValue() != 0xff00ff00 ||
5781       Mask1->getAPIntValue() != 0x00ff00ff)
5782     return SDValue();
5783   SDValue Shift0 = N0.getOperand(0);
5784   SDValue Shift1 = N1.getOperand(0);
5785   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
5786     return SDValue();
5787   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
5788   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
5789   if (!ShiftAmt0 || !ShiftAmt1)
5790     return SDValue();
5791   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
5792     return SDValue();
5793   if (Shift0.getOperand(0) != Shift1.getOperand(0))
5794     return SDValue();
5795 
5796   SDLoc DL(N);
5797   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
5798   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
5799   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5800 }
5801 
5802 /// Match a 32-bit packed halfword bswap. That is
5803 /// ((x & 0x000000ff) << 8) |
5804 /// ((x & 0x0000ff00) >> 8) |
5805 /// ((x & 0x00ff0000) << 8) |
5806 /// ((x & 0xff000000) >> 8)
5807 /// => (rotl (bswap x), 16)
5808 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5809   if (!LegalOperations)
5810     return SDValue();
5811 
5812   EVT VT = N->getValueType(0);
5813   if (VT != MVT::i32)
5814     return SDValue();
5815   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5816     return SDValue();
5817 
5818   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
5819                                               getShiftAmountTy(VT)))
5820   return BSwap;
5821 
5822   // Try again with commuted operands.
5823   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
5824                                               getShiftAmountTy(VT)))
5825   return BSwap;
5826 
5827 
5828   // Look for either
5829   // (or (bswaphpair), (bswaphpair))
5830   // (or (or (bswaphpair), (and)), (and))
5831   // (or (or (and), (bswaphpair)), (and))
5832   SDNode *Parts[4] = {};
5833 
5834   if (isBSwapHWordPair(N0, Parts)) {
5835     // (or (or (and), (and)), (or (and), (and)))
5836     if (!isBSwapHWordPair(N1, Parts))
5837       return SDValue();
5838   } else if (N0.getOpcode() == ISD::OR) {
5839     // (or (or (or (and), (and)), (and)), (and))
5840     if (!isBSwapHWordElement(N1, Parts))
5841       return SDValue();
5842     SDValue N00 = N0.getOperand(0);
5843     SDValue N01 = N0.getOperand(1);
5844     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
5845         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
5846       return SDValue();
5847   } else
5848     return SDValue();
5849 
5850   // Make sure the parts are all coming from the same node.
5851   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5852     return SDValue();
5853 
5854   SDLoc DL(N);
5855   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5856                               SDValue(Parts[0], 0));
5857 
5858   // Result of the bswap should be rotated by 16. If it's not legal, then
5859   // do  (x << 16) | (x >> 16).
5860   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5861   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5862     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5863   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5864     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5865   return DAG.getNode(ISD::OR, DL, VT,
5866                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5867                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5868 }
5869 
5870 /// This contains all DAGCombine rules which reduce two values combined by
5871 /// an Or operation to a single value \see visitANDLike().
5872 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5873   EVT VT = N1.getValueType();
5874   SDLoc DL(N);
5875 
5876   // fold (or x, undef) -> -1
5877   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5878     return DAG.getAllOnesConstant(DL, VT);
5879 
5880   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5881     return V;
5882 
5883   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5884   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5885       // Don't increase # computations.
5886       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5887     // We can only do this xform if we know that bits from X that are set in C2
5888     // but not in C1 are already zero.  Likewise for Y.
5889     if (const ConstantSDNode *N0O1C =
5890         getAsNonOpaqueConstant(N0.getOperand(1))) {
5891       if (const ConstantSDNode *N1O1C =
5892           getAsNonOpaqueConstant(N1.getOperand(1))) {
5893         // We can only do this xform if we know that bits from X that are set in
5894         // C2 but not in C1 are already zero.  Likewise for Y.
5895         const APInt &LHSMask = N0O1C->getAPIntValue();
5896         const APInt &RHSMask = N1O1C->getAPIntValue();
5897 
5898         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5899             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5900           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5901                                   N0.getOperand(0), N1.getOperand(0));
5902           return DAG.getNode(ISD::AND, DL, VT, X,
5903                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5904         }
5905       }
5906     }
5907   }
5908 
5909   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5910   if (N0.getOpcode() == ISD::AND &&
5911       N1.getOpcode() == ISD::AND &&
5912       N0.getOperand(0) == N1.getOperand(0) &&
5913       // Don't increase # computations.
5914       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5915     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5916                             N0.getOperand(1), N1.getOperand(1));
5917     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5918   }
5919 
5920   return SDValue();
5921 }
5922 
5923 /// OR combines for which the commuted variant will be tried as well.
5924 static SDValue visitORCommutative(
5925     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5926   EVT VT = N0.getValueType();
5927   if (N0.getOpcode() == ISD::AND) {
5928     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5929     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5930       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5931 
5932     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5933     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5934       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5935   }
5936 
5937   return SDValue();
5938 }
5939 
5940 SDValue DAGCombiner::visitOR(SDNode *N) {
5941   SDValue N0 = N->getOperand(0);
5942   SDValue N1 = N->getOperand(1);
5943   EVT VT = N1.getValueType();
5944 
5945   // x | x --> x
5946   if (N0 == N1)
5947     return N0;
5948 
5949   // fold vector ops
5950   if (VT.isVector()) {
5951     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5952       return FoldedVOp;
5953 
5954     // fold (or x, 0) -> x, vector edition
5955     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5956       return N1;
5957     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5958       return N0;
5959 
5960     // fold (or x, -1) -> -1, vector edition
5961     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5962       // do not return N0, because undef node may exist in N0
5963       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5964     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5965       // do not return N1, because undef node may exist in N1
5966       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5967 
5968     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5969     // Do this only if the resulting shuffle is legal.
5970     if (isa<ShuffleVectorSDNode>(N0) &&
5971         isa<ShuffleVectorSDNode>(N1) &&
5972         // Avoid folding a node with illegal type.
5973         TLI.isTypeLegal(VT)) {
5974       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5975       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5976       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5977       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5978       // Ensure both shuffles have a zero input.
5979       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5980         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5981         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5982         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5983         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5984         bool CanFold = true;
5985         int NumElts = VT.getVectorNumElements();
5986         SmallVector<int, 4> Mask(NumElts);
5987 
5988         for (int i = 0; i != NumElts; ++i) {
5989           int M0 = SV0->getMaskElt(i);
5990           int M1 = SV1->getMaskElt(i);
5991 
5992           // Determine if either index is pointing to a zero vector.
5993           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5994           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5995 
5996           // If one element is zero and the otherside is undef, keep undef.
5997           // This also handles the case that both are undef.
5998           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5999             Mask[i] = -1;
6000             continue;
6001           }
6002 
6003           // Make sure only one of the elements is zero.
6004           if (M0Zero == M1Zero) {
6005             CanFold = false;
6006             break;
6007           }
6008 
6009           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6010 
6011           // We have a zero and non-zero element. If the non-zero came from
6012           // SV0 make the index a LHS index. If it came from SV1, make it
6013           // a RHS index. We need to mod by NumElts because we don't care
6014           // which operand it came from in the original shuffles.
6015           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6016         }
6017 
6018         if (CanFold) {
6019           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6020           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6021 
6022           SDValue LegalShuffle =
6023               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6024                                           Mask, DAG);
6025           if (LegalShuffle)
6026             return LegalShuffle;
6027         }
6028       }
6029     }
6030   }
6031 
6032   // fold (or c1, c2) -> c1|c2
6033   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6034   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6035     return C;
6036 
6037   // canonicalize constant to RHS
6038   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6039      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6040     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6041 
6042   // fold (or x, 0) -> x
6043   if (isNullConstant(N1))
6044     return N0;
6045 
6046   // fold (or x, -1) -> -1
6047   if (isAllOnesConstant(N1))
6048     return N1;
6049 
6050   if (SDValue NewSel = foldBinOpIntoSelect(N))
6051     return NewSel;
6052 
6053   // fold (or x, c) -> c iff (x & ~c) == 0
6054   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6055     return N1;
6056 
6057   if (SDValue Combined = visitORLike(N0, N1, N))
6058     return Combined;
6059 
6060   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6061     return Combined;
6062 
6063   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6064   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6065     return BSwap;
6066   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6067     return BSwap;
6068 
6069   // reassociate or
6070   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6071     return ROR;
6072 
6073   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6074   // iff (c1 & c2) != 0 or c1/c2 are undef.
6075   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6076     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6077   };
6078   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6079       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6080     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6081                                                  {N1, N0.getOperand(1)})) {
6082       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6083       AddToWorklist(IOR.getNode());
6084       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6085     }
6086   }
6087 
6088   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6089     return Combined;
6090   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6091     return Combined;
6092 
6093   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6094   if (N0.getOpcode() == N1.getOpcode())
6095     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6096       return V;
6097 
6098   // See if this is some rotate idiom.
6099   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6100     return Rot;
6101 
6102   if (SDValue Load = MatchLoadCombine(N))
6103     return Load;
6104 
6105   // Simplify the operands using demanded-bits information.
6106   if (SimplifyDemandedBits(SDValue(N, 0)))
6107     return SDValue(N, 0);
6108 
6109   // If OR can be rewritten into ADD, try combines based on ADD.
6110   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6111       DAG.haveNoCommonBitsSet(N0, N1))
6112     if (SDValue Combined = visitADDLike(N))
6113       return Combined;
6114 
6115   return SDValue();
6116 }
6117 
6118 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6119   if (Op.getOpcode() == ISD::AND &&
6120       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6121     Mask = Op.getOperand(1);
6122     return Op.getOperand(0);
6123   }
6124   return Op;
6125 }
6126 
6127 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6128 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6129                             SDValue &Mask) {
6130   Op = stripConstantMask(DAG, Op, Mask);
6131   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6132     Shift = Op;
6133     return true;
6134   }
6135   return false;
6136 }
6137 
6138 /// Helper function for visitOR to extract the needed side of a rotate idiom
6139 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6140 /// InstCombine merged some outside op with one of the shifts from
6141 /// the rotate pattern.
6142 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6143 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6144 /// patterns:
6145 ///
6146 ///   (or (add v v) (shrl v bitwidth-1)):
6147 ///     expands (add v v) -> (shl v 1)
6148 ///
6149 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6150 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6151 ///
6152 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6153 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6154 ///
6155 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6156 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6157 ///
6158 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6159 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6160 ///
6161 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6162 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6163                                      SDValue ExtractFrom, SDValue &Mask,
6164                                      const SDLoc &DL) {
6165   assert(OppShift && ExtractFrom && "Empty SDValue");
6166   assert(
6167       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6168       "Existing shift must be valid as a rotate half");
6169 
6170   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6171 
6172   // Value and Type of the shift.
6173   SDValue OppShiftLHS = OppShift.getOperand(0);
6174   EVT ShiftedVT = OppShiftLHS.getValueType();
6175 
6176   // Amount of the existing shift.
6177   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6178 
6179   // (add v v) -> (shl v 1)
6180   // TODO: Should this be a general DAG canonicalization?
6181   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6182       ExtractFrom.getOpcode() == ISD::ADD &&
6183       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6184       ExtractFrom.getOperand(0) == OppShiftLHS &&
6185       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6186     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6187                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6188 
6189   // Preconditions:
6190   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6191   //
6192   // Find opcode of the needed shift to be extracted from (op0 v c0).
6193   unsigned Opcode = ISD::DELETED_NODE;
6194   bool IsMulOrDiv = false;
6195   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6196   // opcode or its arithmetic (mul or udiv) variant.
6197   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6198     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6199     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6200       return false;
6201     Opcode = NeededShift;
6202     return true;
6203   };
6204   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6205   // that the needed shift can be extracted from.
6206   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6207       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6208     return SDValue();
6209 
6210   // op0 must be the same opcode on both sides, have the same LHS argument,
6211   // and produce the same value type.
6212   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6213       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6214       ShiftedVT != ExtractFrom.getValueType())
6215     return SDValue();
6216 
6217   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6218   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6219   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6220   ConstantSDNode *ExtractFromCst =
6221       isConstOrConstSplat(ExtractFrom.getOperand(1));
6222   // TODO: We should be able to handle non-uniform constant vectors for these values
6223   // Check that we have constant values.
6224   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6225       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6226       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6227     return SDValue();
6228 
6229   // Compute the shift amount we need to extract to complete the rotate.
6230   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6231   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6232     return SDValue();
6233   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6234   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6235   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6236   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6237   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6238 
6239   // Now try extract the needed shift from the ExtractFrom op and see if the
6240   // result matches up with the existing shift's LHS op.
6241   if (IsMulOrDiv) {
6242     // Op to extract from is a mul or udiv by a constant.
6243     // Check:
6244     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6245     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6246     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6247                                                  NeededShiftAmt.getZExtValue());
6248     APInt ResultAmt;
6249     APInt Rem;
6250     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6251     if (Rem != 0 || ResultAmt != OppLHSAmt)
6252       return SDValue();
6253   } else {
6254     // Op to extract from is a shift by a constant.
6255     // Check:
6256     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6257     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6258                                           ExtractFromAmt.getBitWidth()))
6259       return SDValue();
6260   }
6261 
6262   // Return the expanded shift op that should allow a rotate to be formed.
6263   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6264   EVT ResVT = ExtractFrom.getValueType();
6265   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6266   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6267 }
6268 
6269 // Return true if we can prove that, whenever Neg and Pos are both in the
6270 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6271 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6272 //
6273 //     (or (shift1 X, Neg), (shift2 X, Pos))
6274 //
6275 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6276 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6277 // to consider shift amounts with defined behavior.
6278 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6279                            SelectionDAG &DAG) {
6280   // If EltSize is a power of 2 then:
6281   //
6282   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6283   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6284   //
6285   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6286   // for the stronger condition:
6287   //
6288   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6289   //
6290   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6291   // we can just replace Neg with Neg' for the rest of the function.
6292   //
6293   // In other cases we check for the even stronger condition:
6294   //
6295   //     Neg == EltSize - Pos                                    [B]
6296   //
6297   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6298   // behavior if Pos == 0 (and consequently Neg == EltSize).
6299   //
6300   // We could actually use [A] whenever EltSize is a power of 2, but the
6301   // only extra cases that it would match are those uninteresting ones
6302   // where Neg and Pos are never in range at the same time.  E.g. for
6303   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6304   // as well as (sub 32, Pos), but:
6305   //
6306   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6307   //
6308   // always invokes undefined behavior for 32-bit X.
6309   //
6310   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6311   unsigned MaskLoBits = 0;
6312   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6313     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6314       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6315       unsigned Bits = Log2_64(EltSize);
6316       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6317           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6318         Neg = Neg.getOperand(0);
6319         MaskLoBits = Bits;
6320       }
6321     }
6322   }
6323 
6324   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6325   if (Neg.getOpcode() != ISD::SUB)
6326     return false;
6327   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6328   if (!NegC)
6329     return false;
6330   SDValue NegOp1 = Neg.getOperand(1);
6331 
6332   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6333   // Pos'.  The truncation is redundant for the purpose of the equality.
6334   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6335     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6336       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6337       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6338           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6339            MaskLoBits))
6340         Pos = Pos.getOperand(0);
6341     }
6342   }
6343 
6344   // The condition we need is now:
6345   //
6346   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6347   //
6348   // If NegOp1 == Pos then we need:
6349   //
6350   //              EltSize & Mask == NegC & Mask
6351   //
6352   // (because "x & Mask" is a truncation and distributes through subtraction).
6353   //
6354   // We also need to account for a potential truncation of NegOp1 if the amount
6355   // has already been legalized to a shift amount type.
6356   APInt Width;
6357   if ((Pos == NegOp1) ||
6358       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6359     Width = NegC->getAPIntValue();
6360 
6361   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6362   // Then the condition we want to prove becomes:
6363   //
6364   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6365   //
6366   // which, again because "x & Mask" is a truncation, becomes:
6367   //
6368   //                NegC & Mask == (EltSize - PosC) & Mask
6369   //             EltSize & Mask == (NegC + PosC) & Mask
6370   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6371     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6372       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6373     else
6374       return false;
6375   } else
6376     return false;
6377 
6378   // Now we just need to check that EltSize & Mask == Width & Mask.
6379   if (MaskLoBits)
6380     // EltSize & Mask is 0 since Mask is EltSize - 1.
6381     return Width.getLoBits(MaskLoBits) == 0;
6382   return Width == EltSize;
6383 }
6384 
6385 // A subroutine of MatchRotate used once we have found an OR of two opposite
6386 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6387 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6388 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6389 // Neg with outer conversions stripped away.
6390 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6391                                        SDValue Neg, SDValue InnerPos,
6392                                        SDValue InnerNeg, unsigned PosOpcode,
6393                                        unsigned NegOpcode, const SDLoc &DL) {
6394   // fold (or (shl x, (*ext y)),
6395   //          (srl x, (*ext (sub 32, y)))) ->
6396   //   (rotl x, y) or (rotr x, (sub 32, y))
6397   //
6398   // fold (or (shl x, (*ext (sub 32, y))),
6399   //          (srl x, (*ext y))) ->
6400   //   (rotr x, y) or (rotl x, (sub 32, y))
6401   EVT VT = Shifted.getValueType();
6402   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6403     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6404     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6405                        HasPos ? Pos : Neg);
6406   }
6407 
6408   return SDValue();
6409 }
6410 
6411 // A subroutine of MatchRotate used once we have found an OR of two opposite
6412 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
6413 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6414 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6415 // Neg with outer conversions stripped away.
6416 // TODO: Merge with MatchRotatePosNeg.
6417 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6418                                        SDValue Neg, SDValue InnerPos,
6419                                        SDValue InnerNeg, unsigned PosOpcode,
6420                                        unsigned NegOpcode, const SDLoc &DL) {
6421   EVT VT = N0.getValueType();
6422   unsigned EltBits = VT.getScalarSizeInBits();
6423 
6424   // fold (or (shl x0, (*ext y)),
6425   //          (srl x1, (*ext (sub 32, y)))) ->
6426   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6427   //
6428   // fold (or (shl x0, (*ext (sub 32, y))),
6429   //          (srl x1, (*ext y))) ->
6430   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6431   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
6432     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6433     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6434                        HasPos ? Pos : Neg);
6435   }
6436 
6437   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6438   // so for now just use the PosOpcode case if its legal.
6439   // TODO: When can we use the NegOpcode case?
6440   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6441     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6442       if (Op.getOpcode() != BinOpc)
6443         return false;
6444       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6445       return Cst && (Cst->getAPIntValue() == Imm);
6446     };
6447 
6448     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6449     //   -> (fshl x0, x1, y)
6450     if (IsBinOpImm(N1, ISD::SRL, 1) &&
6451         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6452         InnerPos == InnerNeg.getOperand(0) &&
6453         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6454       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6455     }
6456 
6457     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6458     //   -> (fshr x0, x1, y)
6459     if (IsBinOpImm(N0, ISD::SHL, 1) &&
6460         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6461         InnerNeg == InnerPos.getOperand(0) &&
6462         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6463       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6464     }
6465 
6466     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6467     //   -> (fshr x0, x1, y)
6468     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6469     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6470         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6471         InnerNeg == InnerPos.getOperand(0) &&
6472         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6473       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6474     }
6475   }
6476 
6477   return SDValue();
6478 }
6479 
6480 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6481 // idioms for rotate, and if the target supports rotation instructions, generate
6482 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6483 // with different shifted sources.
6484 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6485   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6486   EVT VT = LHS.getValueType();
6487   if (!TLI.isTypeLegal(VT))
6488     return SDValue();
6489 
6490   // The target must have at least one rotate/funnel flavor.
6491   bool HasROTL = hasOperation(ISD::ROTL, VT);
6492   bool HasROTR = hasOperation(ISD::ROTR, VT);
6493   bool HasFSHL = hasOperation(ISD::FSHL, VT);
6494   bool HasFSHR = hasOperation(ISD::FSHR, VT);
6495   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6496     return SDValue();
6497 
6498   // Check for truncated rotate.
6499   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6500       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6501     assert(LHS.getValueType() == RHS.getValueType());
6502     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6503       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6504     }
6505   }
6506 
6507   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6508   SDValue LHSShift;   // The shift.
6509   SDValue LHSMask;    // AND value if any.
6510   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6511 
6512   SDValue RHSShift;   // The shift.
6513   SDValue RHSMask;    // AND value if any.
6514   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6515 
6516   // If neither side matched a rotate half, bail
6517   if (!LHSShift && !RHSShift)
6518     return SDValue();
6519 
6520   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6521   // side of the rotate, so try to handle that here. In all cases we need to
6522   // pass the matched shift from the opposite side to compute the opcode and
6523   // needed shift amount to extract.  We still want to do this if both sides
6524   // matched a rotate half because one half may be a potential overshift that
6525   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6526   // single one).
6527 
6528   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6529   if (LHSShift)
6530     if (SDValue NewRHSShift =
6531             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6532       RHSShift = NewRHSShift;
6533   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6534   if (RHSShift)
6535     if (SDValue NewLHSShift =
6536             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6537       LHSShift = NewLHSShift;
6538 
6539   // If a side is still missing, nothing else we can do.
6540   if (!RHSShift || !LHSShift)
6541     return SDValue();
6542 
6543   // At this point we've matched or extracted a shift op on each side.
6544 
6545   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6546     return SDValue(); // Shifts must disagree.
6547 
6548   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6549   if (!IsRotate && !(HasFSHL || HasFSHR))
6550     return SDValue(); // Requires funnel shift support.
6551 
6552   // Canonicalize shl to left side in a shl/srl pair.
6553   if (RHSShift.getOpcode() == ISD::SHL) {
6554     std::swap(LHS, RHS);
6555     std::swap(LHSShift, RHSShift);
6556     std::swap(LHSMask, RHSMask);
6557   }
6558 
6559   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6560   SDValue LHSShiftArg = LHSShift.getOperand(0);
6561   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6562   SDValue RHSShiftArg = RHSShift.getOperand(0);
6563   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6564 
6565   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6566   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6567   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
6568   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
6569   // iff C1+C2 == EltSizeInBits
6570   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6571                                         ConstantSDNode *RHS) {
6572     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6573   };
6574   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6575     SDValue Res;
6576     if (IsRotate && (HasROTL || HasROTR))
6577       Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
6578                         HasROTL ? LHSShiftAmt : RHSShiftAmt);
6579     else
6580       Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
6581                         RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
6582 
6583     // If there is an AND of either shifted operand, apply it to the result.
6584     if (LHSMask.getNode() || RHSMask.getNode()) {
6585       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6586       SDValue Mask = AllOnes;
6587 
6588       if (LHSMask.getNode()) {
6589         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6590         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6591                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6592       }
6593       if (RHSMask.getNode()) {
6594         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6595         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6596                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6597       }
6598 
6599       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
6600     }
6601 
6602     return Res;
6603   }
6604 
6605   // If there is a mask here, and we have a variable shift, we can't be sure
6606   // that we're masking out the right stuff.
6607   if (LHSMask.getNode() || RHSMask.getNode())
6608     return SDValue();
6609 
6610   // If the shift amount is sign/zext/any-extended just peel it off.
6611   SDValue LExtOp0 = LHSShiftAmt;
6612   SDValue RExtOp0 = RHSShiftAmt;
6613   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6614        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6615        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6616        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6617       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6618        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6619        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6620        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6621     LExtOp0 = LHSShiftAmt.getOperand(0);
6622     RExtOp0 = RHSShiftAmt.getOperand(0);
6623   }
6624 
6625   if (IsRotate && (HasROTL || HasROTR)) {
6626     SDValue TryL =
6627         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
6628                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6629     if (TryL)
6630       return TryL;
6631 
6632     SDValue TryR =
6633         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
6634                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6635     if (TryR)
6636       return TryR;
6637   }
6638 
6639   SDValue TryL =
6640       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6641                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
6642   if (TryL)
6643     return TryL;
6644 
6645   SDValue TryR =
6646       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6647                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
6648   if (TryR)
6649     return TryR;
6650 
6651   return SDValue();
6652 }
6653 
6654 namespace {
6655 
6656 /// Represents known origin of an individual byte in load combine pattern. The
6657 /// value of the byte is either constant zero or comes from memory.
6658 struct ByteProvider {
6659   // For constant zero providers Load is set to nullptr. For memory providers
6660   // Load represents the node which loads the byte from memory.
6661   // ByteOffset is the offset of the byte in the value produced by the load.
6662   LoadSDNode *Load = nullptr;
6663   unsigned ByteOffset = 0;
6664 
6665   ByteProvider() = default;
6666 
6667   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6668     return ByteProvider(Load, ByteOffset);
6669   }
6670 
6671   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6672 
6673   bool isConstantZero() const { return !Load; }
6674   bool isMemory() const { return Load; }
6675 
6676   bool operator==(const ByteProvider &Other) const {
6677     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6678   }
6679 
6680 private:
6681   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6682       : Load(Load), ByteOffset(ByteOffset) {}
6683 };
6684 
6685 } // end anonymous namespace
6686 
6687 /// Recursively traverses the expression calculating the origin of the requested
6688 /// byte of the given value. Returns None if the provider can't be calculated.
6689 ///
6690 /// For all the values except the root of the expression verifies that the value
6691 /// has exactly one use and if it's not true return None. This way if the origin
6692 /// of the byte is returned it's guaranteed that the values which contribute to
6693 /// the byte are not used outside of this expression.
6694 ///
6695 /// Because the parts of the expression are not allowed to have more than one
6696 /// use this function iterates over trees, not DAGs. So it never visits the same
6697 /// node more than once.
6698 static const Optional<ByteProvider>
6699 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6700                       bool Root = false) {
6701   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6702   if (Depth == 10)
6703     return None;
6704 
6705   if (!Root && !Op.hasOneUse())
6706     return None;
6707 
6708   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6709   unsigned BitWidth = Op.getValueSizeInBits();
6710   if (BitWidth % 8 != 0)
6711     return None;
6712   unsigned ByteWidth = BitWidth / 8;
6713   assert(Index < ByteWidth && "invalid index requested");
6714   (void) ByteWidth;
6715 
6716   switch (Op.getOpcode()) {
6717   case ISD::OR: {
6718     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6719     if (!LHS)
6720       return None;
6721     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6722     if (!RHS)
6723       return None;
6724 
6725     if (LHS->isConstantZero())
6726       return RHS;
6727     if (RHS->isConstantZero())
6728       return LHS;
6729     return None;
6730   }
6731   case ISD::SHL: {
6732     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6733     if (!ShiftOp)
6734       return None;
6735 
6736     uint64_t BitShift = ShiftOp->getZExtValue();
6737     if (BitShift % 8 != 0)
6738       return None;
6739     uint64_t ByteShift = BitShift / 8;
6740 
6741     return Index < ByteShift
6742                ? ByteProvider::getConstantZero()
6743                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6744                                        Depth + 1);
6745   }
6746   case ISD::ANY_EXTEND:
6747   case ISD::SIGN_EXTEND:
6748   case ISD::ZERO_EXTEND: {
6749     SDValue NarrowOp = Op->getOperand(0);
6750     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6751     if (NarrowBitWidth % 8 != 0)
6752       return None;
6753     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6754 
6755     if (Index >= NarrowByteWidth)
6756       return Op.getOpcode() == ISD::ZERO_EXTEND
6757                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6758                  : None;
6759     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6760   }
6761   case ISD::BSWAP:
6762     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6763                                  Depth + 1);
6764   case ISD::LOAD: {
6765     auto L = cast<LoadSDNode>(Op.getNode());
6766     if (!L->isSimple() || L->isIndexed())
6767       return None;
6768 
6769     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6770     if (NarrowBitWidth % 8 != 0)
6771       return None;
6772     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6773 
6774     if (Index >= NarrowByteWidth)
6775       return L->getExtensionType() == ISD::ZEXTLOAD
6776                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6777                  : None;
6778     return ByteProvider::getMemory(L, Index);
6779   }
6780   }
6781 
6782   return None;
6783 }
6784 
6785 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6786   return i;
6787 }
6788 
6789 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6790   return BW - i - 1;
6791 }
6792 
6793 // Check if the bytes offsets we are looking at match with either big or
6794 // little endian value loaded. Return true for big endian, false for little
6795 // endian, and None if match failed.
6796 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
6797                                   int64_t FirstOffset) {
6798   // The endian can be decided only when it is 2 bytes at least.
6799   unsigned Width = ByteOffsets.size();
6800   if (Width < 2)
6801     return None;
6802 
6803   bool BigEndian = true, LittleEndian = true;
6804   for (unsigned i = 0; i < Width; i++) {
6805     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6806     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6807     BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6808     if (!BigEndian && !LittleEndian)
6809       return None;
6810   }
6811 
6812   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6813                                         "little endian");
6814   return BigEndian;
6815 }
6816 
6817 static SDValue stripTruncAndExt(SDValue Value) {
6818   switch (Value.getOpcode()) {
6819   case ISD::TRUNCATE:
6820   case ISD::ZERO_EXTEND:
6821   case ISD::SIGN_EXTEND:
6822   case ISD::ANY_EXTEND:
6823     return stripTruncAndExt(Value.getOperand(0));
6824   }
6825   return Value;
6826 }
6827 
6828 /// Match a pattern where a wide type scalar value is stored by several narrow
6829 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6830 /// supports it.
6831 ///
6832 /// Assuming little endian target:
6833 ///  i8 *p = ...
6834 ///  i32 val = ...
6835 ///  p[0] = (val >> 0) & 0xFF;
6836 ///  p[1] = (val >> 8) & 0xFF;
6837 ///  p[2] = (val >> 16) & 0xFF;
6838 ///  p[3] = (val >> 24) & 0xFF;
6839 /// =>
6840 ///  *((i32)p) = val;
6841 ///
6842 ///  i8 *p = ...
6843 ///  i32 val = ...
6844 ///  p[0] = (val >> 24) & 0xFF;
6845 ///  p[1] = (val >> 16) & 0xFF;
6846 ///  p[2] = (val >> 8) & 0xFF;
6847 ///  p[3] = (val >> 0) & 0xFF;
6848 /// =>
6849 ///  *((i32)p) = BSWAP(val);
6850 SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6851   // Collect all the stores in the chain.
6852   SDValue Chain;
6853   SmallVector<StoreSDNode *, 8> Stores;
6854   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6855     // TODO: Allow unordered atomics when wider type is legal (see D66309)
6856     if (Store->getMemoryVT() != MVT::i8 ||
6857         !Store->isSimple() || Store->isIndexed())
6858       return SDValue();
6859     Stores.push_back(Store);
6860     Chain = Store->getChain();
6861   }
6862   // Handle the simple type only.
6863   unsigned Width = Stores.size();
6864   EVT VT = EVT::getIntegerVT(
6865     *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6866   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6867     return SDValue();
6868 
6869   if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6870     return SDValue();
6871 
6872   // Check if all the bytes of the combined value we are looking at are stored
6873   // to the same base address. Collect bytes offsets from Base address into
6874   // ByteOffsets.
6875   SDValue CombinedValue;
6876   SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
6877   int64_t FirstOffset = INT64_MAX;
6878   StoreSDNode *FirstStore = nullptr;
6879   Optional<BaseIndexOffset> Base;
6880   for (auto Store : Stores) {
6881     // All the stores store different byte of the CombinedValue. A truncate is
6882     // required to get that byte value.
6883     SDValue Trunc = Store->getValue();
6884     if (Trunc.getOpcode() != ISD::TRUNCATE)
6885       return SDValue();
6886     // A shift operation is required to get the right byte offset, except the
6887     // first byte.
6888     int64_t Offset = 0;
6889     SDValue Value = Trunc.getOperand(0);
6890     if (Value.getOpcode() == ISD::SRL ||
6891         Value.getOpcode() == ISD::SRA) {
6892       auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1));
6893       // Trying to match the following pattern. The shift offset must be
6894       // a constant and a multiple of 8. It is the byte offset in "y".
6895       //
6896       // x = srl y, offset
6897       // i8 z = trunc x
6898       // store z, ...
6899       if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6900         return SDValue();
6901 
6902      Offset = ShiftOffset->getSExtValue()/8;
6903      Value = Value.getOperand(0);
6904     }
6905 
6906     // Stores must share the same combined value with different offsets.
6907     if (!CombinedValue)
6908       CombinedValue = Value;
6909     else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6910       return SDValue();
6911 
6912     // The trunc and all the extend operation should be stripped to get the
6913     // real value we are stored.
6914     else if (CombinedValue.getValueType() != VT) {
6915       if (Value.getValueType() == VT ||
6916           Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6917         CombinedValue = Value;
6918       // Give up if the combined value type is smaller than the store size.
6919       if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6920         return SDValue();
6921     }
6922 
6923     // Stores must share the same base address
6924     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6925     int64_t ByteOffsetFromBase = 0;
6926     if (!Base)
6927       Base = Ptr;
6928     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6929       return SDValue();
6930 
6931     // Remember the first byte store
6932     if (ByteOffsetFromBase < FirstOffset) {
6933       FirstStore = Store;
6934       FirstOffset = ByteOffsetFromBase;
6935     }
6936     // Map the offset in the store and the offset in the combined value, and
6937     // early return if it has been set before.
6938     if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6939       return SDValue();
6940     ByteOffsets[Offset] = ByteOffsetFromBase;
6941   }
6942 
6943   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6944   assert(FirstStore && "First store must be set");
6945 
6946   // Check if the bytes of the combined value we are looking at match with
6947   // either big or little endian value store.
6948   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6949   if (!IsBigEndian.hasValue())
6950     return SDValue();
6951 
6952   // The node we are looking at matches with the pattern, check if we can
6953   // replace it with a single bswap if needed and store.
6954 
6955   // If the store needs byte swap check if the target supports it
6956   bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6957 
6958   // Before legalize we can introduce illegal bswaps which will be later
6959   // converted to an explicit bswap sequence. This way we end up with a single
6960   // store and byte shuffling instead of several stores and byte shuffling.
6961   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6962     return SDValue();
6963 
6964   // Check that a store of the wide type is both allowed and fast on the target
6965   bool Fast = false;
6966   bool Allowed =
6967       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6968                              *FirstStore->getMemOperand(), &Fast);
6969   if (!Allowed || !Fast)
6970     return SDValue();
6971 
6972   if (VT != CombinedValue.getValueType()) {
6973     assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6974            "Get unexpected store value to combine");
6975     CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6976                              CombinedValue);
6977   }
6978 
6979   if (NeedsBswap)
6980     CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6981 
6982   SDValue NewStore =
6983     DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
6984                  FirstStore->getPointerInfo(), FirstStore->getAlignment());
6985 
6986   // Rely on other DAG combine rules to remove the other individual stores.
6987   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6988   return NewStore;
6989 }
6990 
6991 /// Match a pattern where a wide type scalar value is loaded by several narrow
6992 /// loads and combined by shifts and ors. Fold it into a single load or a load
6993 /// and a BSWAP if the targets supports it.
6994 ///
6995 /// Assuming little endian target:
6996 ///  i8 *a = ...
6997 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6998 /// =>
6999 ///  i32 val = *((i32)a)
7000 ///
7001 ///  i8 *a = ...
7002 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7003 /// =>
7004 ///  i32 val = BSWAP(*((i32)a))
7005 ///
7006 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7007 /// interact well with the worklist mechanism. When a part of the pattern is
7008 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7009 /// but the root node of the pattern which triggers the load combine is not
7010 /// necessarily a direct user of the changed node. For example, once the address
7011 /// of t28 load is reassociated load combine won't be triggered:
7012 ///             t25: i32 = add t4, Constant:i32<2>
7013 ///           t26: i64 = sign_extend t25
7014 ///        t27: i64 = add t2, t26
7015 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7016 ///     t29: i32 = zero_extend t28
7017 ///   t32: i32 = shl t29, Constant:i8<8>
7018 /// t33: i32 = or t23, t32
7019 /// As a possible fix visitLoad can check if the load can be a part of a load
7020 /// combine pattern and add corresponding OR roots to the worklist.
7021 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7022   assert(N->getOpcode() == ISD::OR &&
7023          "Can only match load combining against OR nodes");
7024 
7025   // Handles simple types only
7026   EVT VT = N->getValueType(0);
7027   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7028     return SDValue();
7029   unsigned ByteWidth = VT.getSizeInBits() / 8;
7030 
7031   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7032   auto MemoryByteOffset = [&] (ByteProvider P) {
7033     assert(P.isMemory() && "Must be a memory byte provider");
7034     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7035     assert(LoadBitWidth % 8 == 0 &&
7036            "can only analyze providers for individual bytes not bit");
7037     unsigned LoadByteWidth = LoadBitWidth / 8;
7038     return IsBigEndianTarget
7039             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
7040             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
7041   };
7042 
7043   Optional<BaseIndexOffset> Base;
7044   SDValue Chain;
7045 
7046   SmallPtrSet<LoadSDNode *, 8> Loads;
7047   Optional<ByteProvider> FirstByteProvider;
7048   int64_t FirstOffset = INT64_MAX;
7049 
7050   // Check if all the bytes of the OR we are looking at are loaded from the same
7051   // base address. Collect bytes offsets from Base address in ByteOffsets.
7052   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7053   unsigned ZeroExtendedBytes = 0;
7054   for (int i = ByteWidth - 1; i >= 0; --i) {
7055     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7056     if (!P)
7057       return SDValue();
7058 
7059     if (P->isConstantZero()) {
7060       // It's OK for the N most significant bytes to be 0, we can just
7061       // zero-extend the load.
7062       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7063         return SDValue();
7064       continue;
7065     }
7066     assert(P->isMemory() && "provenance should either be memory or zero");
7067 
7068     LoadSDNode *L = P->Load;
7069     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7070            !L->isIndexed() &&
7071            "Must be enforced by calculateByteProvider");
7072     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7073 
7074     // All loads must share the same chain
7075     SDValue LChain = L->getChain();
7076     if (!Chain)
7077       Chain = LChain;
7078     else if (Chain != LChain)
7079       return SDValue();
7080 
7081     // Loads must share the same base address
7082     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7083     int64_t ByteOffsetFromBase = 0;
7084     if (!Base)
7085       Base = Ptr;
7086     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7087       return SDValue();
7088 
7089     // Calculate the offset of the current byte from the base address
7090     ByteOffsetFromBase += MemoryByteOffset(*P);
7091     ByteOffsets[i] = ByteOffsetFromBase;
7092 
7093     // Remember the first byte load
7094     if (ByteOffsetFromBase < FirstOffset) {
7095       FirstByteProvider = P;
7096       FirstOffset = ByteOffsetFromBase;
7097     }
7098 
7099     Loads.insert(L);
7100   }
7101   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7102          "memory, so there must be at least one load which produces the value");
7103   assert(Base && "Base address of the accessed memory location must be set");
7104   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7105 
7106   bool NeedsZext = ZeroExtendedBytes > 0;
7107 
7108   EVT MemVT =
7109       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7110 
7111   if (!MemVT.isSimple())
7112     return SDValue();
7113 
7114   // Before legalize we can introduce too wide illegal loads which will be later
7115   // split into legal sized loads. This enables us to combine i64 load by i8
7116   // patterns to a couple of i32 loads on 32 bit targets.
7117   if (LegalOperations &&
7118       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7119                             MemVT))
7120     return SDValue();
7121 
7122   // Check if the bytes of the OR we are looking at match with either big or
7123   // little endian value load
7124   Optional<bool> IsBigEndian = isBigEndian(
7125       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7126   if (!IsBigEndian.hasValue())
7127     return SDValue();
7128 
7129   assert(FirstByteProvider && "must be set");
7130 
7131   // Ensure that the first byte is loaded from zero offset of the first load.
7132   // So the combined value can be loaded from the first load address.
7133   if (MemoryByteOffset(*FirstByteProvider) != 0)
7134     return SDValue();
7135   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7136 
7137   // The node we are looking at matches with the pattern, check if we can
7138   // replace it with a single (possibly zero-extended) load and bswap + shift if
7139   // needed.
7140 
7141   // If the load needs byte swap check if the target supports it
7142   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7143 
7144   // Before legalize we can introduce illegal bswaps which will be later
7145   // converted to an explicit bswap sequence. This way we end up with a single
7146   // load and byte shuffling instead of several loads and byte shuffling.
7147   // We do not introduce illegal bswaps when zero-extending as this tends to
7148   // introduce too many arithmetic instructions.
7149   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7150       !TLI.isOperationLegal(ISD::BSWAP, VT))
7151     return SDValue();
7152 
7153   // If we need to bswap and zero extend, we have to insert a shift. Check that
7154   // it is legal.
7155   if (NeedsBswap && NeedsZext && LegalOperations &&
7156       !TLI.isOperationLegal(ISD::SHL, VT))
7157     return SDValue();
7158 
7159   // Check that a load of the wide type is both allowed and fast on the target
7160   bool Fast = false;
7161   bool Allowed =
7162       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7163                              *FirstLoad->getMemOperand(), &Fast);
7164   if (!Allowed || !Fast)
7165     return SDValue();
7166 
7167   SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7168                                    SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
7169                                    FirstLoad->getPointerInfo(), MemVT,
7170                                    FirstLoad->getAlignment());
7171 
7172   // Transfer chain users from old loads to the new load.
7173   for (LoadSDNode *L : Loads)
7174     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7175 
7176   if (!NeedsBswap)
7177     return NewLoad;
7178 
7179   SDValue ShiftedLoad =
7180       NeedsZext
7181           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7182                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7183                                                    SDLoc(N), LegalOperations))
7184           : NewLoad;
7185   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7186 }
7187 
7188 // If the target has andn, bsl, or a similar bit-select instruction,
7189 // we want to unfold masked merge, with canonical pattern of:
7190 //   |        A  |  |B|
7191 //   ((x ^ y) & m) ^ y
7192 //    |  D  |
7193 // Into:
7194 //   (x & m) | (y & ~m)
7195 // If y is a constant, and the 'andn' does not work with immediates,
7196 // we unfold into a different pattern:
7197 //   ~(~x & m) & (m | y)
7198 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7199 //       the very least that breaks andnpd / andnps patterns, and because those
7200 //       patterns are simplified in IR and shouldn't be created in the DAG
7201 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7202   assert(N->getOpcode() == ISD::XOR);
7203 
7204   // Don't touch 'not' (i.e. where y = -1).
7205   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7206     return SDValue();
7207 
7208   EVT VT = N->getValueType(0);
7209 
7210   // There are 3 commutable operators in the pattern,
7211   // so we have to deal with 8 possible variants of the basic pattern.
7212   SDValue X, Y, M;
7213   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7214     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7215       return false;
7216     SDValue Xor = And.getOperand(XorIdx);
7217     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7218       return false;
7219     SDValue Xor0 = Xor.getOperand(0);
7220     SDValue Xor1 = Xor.getOperand(1);
7221     // Don't touch 'not' (i.e. where y = -1).
7222     if (isAllOnesOrAllOnesSplat(Xor1))
7223       return false;
7224     if (Other == Xor0)
7225       std::swap(Xor0, Xor1);
7226     if (Other != Xor1)
7227       return false;
7228     X = Xor0;
7229     Y = Xor1;
7230     M = And.getOperand(XorIdx ? 0 : 1);
7231     return true;
7232   };
7233 
7234   SDValue N0 = N->getOperand(0);
7235   SDValue N1 = N->getOperand(1);
7236   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7237       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7238     return SDValue();
7239 
7240   // Don't do anything if the mask is constant. This should not be reachable.
7241   // InstCombine should have already unfolded this pattern, and DAGCombiner
7242   // probably shouldn't produce it, too.
7243   if (isa<ConstantSDNode>(M.getNode()))
7244     return SDValue();
7245 
7246   // We can transform if the target has AndNot
7247   if (!TLI.hasAndNot(M))
7248     return SDValue();
7249 
7250   SDLoc DL(N);
7251 
7252   // If Y is a constant, check that 'andn' works with immediates.
7253   if (!TLI.hasAndNot(Y)) {
7254     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7255     // If not, we need to do a bit more work to make sure andn is still used.
7256     SDValue NotX = DAG.getNOT(DL, X, VT);
7257     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7258     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7259     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7260     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7261   }
7262 
7263   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7264   SDValue NotM = DAG.getNOT(DL, M, VT);
7265   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7266 
7267   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7268 }
7269 
7270 SDValue DAGCombiner::visitXOR(SDNode *N) {
7271   SDValue N0 = N->getOperand(0);
7272   SDValue N1 = N->getOperand(1);
7273   EVT VT = N0.getValueType();
7274 
7275   // fold vector ops
7276   if (VT.isVector()) {
7277     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7278       return FoldedVOp;
7279 
7280     // fold (xor x, 0) -> x, vector edition
7281     if (ISD::isBuildVectorAllZeros(N0.getNode()))
7282       return N1;
7283     if (ISD::isBuildVectorAllZeros(N1.getNode()))
7284       return N0;
7285   }
7286 
7287   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7288   SDLoc DL(N);
7289   if (N0.isUndef() && N1.isUndef())
7290     return DAG.getConstant(0, DL, VT);
7291 
7292   // fold (xor x, undef) -> undef
7293   if (N0.isUndef())
7294     return N0;
7295   if (N1.isUndef())
7296     return N1;
7297 
7298   // fold (xor c1, c2) -> c1^c2
7299   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7300     return C;
7301 
7302   // canonicalize constant to RHS
7303   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7304      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7305     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7306 
7307   // fold (xor x, 0) -> x
7308   if (isNullConstant(N1))
7309     return N0;
7310 
7311   if (SDValue NewSel = foldBinOpIntoSelect(N))
7312     return NewSel;
7313 
7314   // reassociate xor
7315   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7316     return RXOR;
7317 
7318   // fold !(x cc y) -> (x !cc y)
7319   unsigned N0Opcode = N0.getOpcode();
7320   SDValue LHS, RHS, CC;
7321   if (TLI.isConstTrueVal(N1.getNode()) &&
7322       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7323     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7324                                                LHS.getValueType());
7325     if (!LegalOperations ||
7326         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7327       switch (N0Opcode) {
7328       default:
7329         llvm_unreachable("Unhandled SetCC Equivalent!");
7330       case ISD::SETCC:
7331         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7332       case ISD::SELECT_CC:
7333         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7334                                N0.getOperand(3), NotCC);
7335       case ISD::STRICT_FSETCC:
7336       case ISD::STRICT_FSETCCS: {
7337         if (N0.hasOneUse()) {
7338           // FIXME Can we handle multiple uses? Could we token factor the chain
7339           // results from the new/old setcc?
7340           SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7341                                        N0.getOperand(0),
7342                                        N0Opcode == ISD::STRICT_FSETCCS);
7343           CombineTo(N, SetCC);
7344           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7345           recursivelyDeleteUnusedNodes(N0.getNode());
7346           return SDValue(N, 0); // Return N so it doesn't get rechecked!
7347         }
7348         break;
7349       }
7350       }
7351     }
7352   }
7353 
7354   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7355   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7356       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7357     SDValue V = N0.getOperand(0);
7358     SDLoc DL0(N0);
7359     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7360                     DAG.getConstant(1, DL0, V.getValueType()));
7361     AddToWorklist(V.getNode());
7362     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7363   }
7364 
7365   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7366   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7367       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7368     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7369     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7370       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7371       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7372       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7373       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7374       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7375     }
7376   }
7377   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7378   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7379       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7380     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7381     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7382       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7383       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7384       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7385       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7386       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7387     }
7388   }
7389 
7390   // fold (not (neg x)) -> (add X, -1)
7391   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7392   // Y is a constant or the subtract has a single use.
7393   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7394       isNullConstant(N0.getOperand(0))) {
7395     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7396                        DAG.getAllOnesConstant(DL, VT));
7397   }
7398 
7399   // fold (not (add X, -1)) -> (neg X)
7400   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7401       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7402     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7403                        N0.getOperand(0));
7404   }
7405 
7406   // fold (xor (and x, y), y) -> (and (not x), y)
7407   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7408     SDValue X = N0.getOperand(0);
7409     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7410     AddToWorklist(NotX.getNode());
7411     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7412   }
7413 
7414   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7415     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7416     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7417     unsigned BitWidth = VT.getScalarSizeInBits();
7418     if (XorC && ShiftC) {
7419       // Don't crash on an oversized shift. We can not guarantee that a bogus
7420       // shift has been simplified to undef.
7421       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7422       if (ShiftAmt < BitWidth) {
7423         APInt Ones = APInt::getAllOnesValue(BitWidth);
7424         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7425         if (XorC->getAPIntValue() == Ones) {
7426           // If the xor constant is a shifted -1, do a 'not' before the shift:
7427           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7428           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7429           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7430           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7431         }
7432       }
7433     }
7434   }
7435 
7436   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7437   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7438     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7439     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7440     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7441       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7442       SDValue S0 = S.getOperand(0);
7443       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7444         unsigned OpSizeInBits = VT.getScalarSizeInBits();
7445         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7446           if (C->getAPIntValue() == (OpSizeInBits - 1))
7447             return DAG.getNode(ISD::ABS, DL, VT, S0);
7448       }
7449     }
7450   }
7451 
7452   // fold (xor x, x) -> 0
7453   if (N0 == N1)
7454     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7455 
7456   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7457   // Here is a concrete example of this equivalence:
7458   // i16   x ==  14
7459   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7460   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7461   //
7462   // =>
7463   //
7464   // i16     ~1      == 0b1111111111111110
7465   // i16 rol(~1, 14) == 0b1011111111111111
7466   //
7467   // Some additional tips to help conceptualize this transform:
7468   // - Try to see the operation as placing a single zero in a value of all ones.
7469   // - There exists no value for x which would allow the result to contain zero.
7470   // - Values of x larger than the bitwidth are undefined and do not require a
7471   //   consistent result.
7472   // - Pushing the zero left requires shifting one bits in from the right.
7473   // A rotate left of ~1 is a nice way of achieving the desired result.
7474   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7475       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7476     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7477                        N0.getOperand(1));
7478   }
7479 
7480   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7481   if (N0Opcode == N1.getOpcode())
7482     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7483       return V;
7484 
7485   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7486   if (SDValue MM = unfoldMaskedMerge(N))
7487     return MM;
7488 
7489   // Simplify the expression using non-local knowledge.
7490   if (SimplifyDemandedBits(SDValue(N, 0)))
7491     return SDValue(N, 0);
7492 
7493   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7494     return Combined;
7495 
7496   return SDValue();
7497 }
7498 
7499 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7500 /// shift-by-constant operand with identical opcode, we may be able to convert
7501 /// that into 2 independent shifts followed by the logic op. This is a
7502 /// throughput improvement.
7503 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7504   // Match a one-use bitwise logic op.
7505   SDValue LogicOp = Shift->getOperand(0);
7506   if (!LogicOp.hasOneUse())
7507     return SDValue();
7508 
7509   unsigned LogicOpcode = LogicOp.getOpcode();
7510   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7511       LogicOpcode != ISD::XOR)
7512     return SDValue();
7513 
7514   // Find a matching one-use shift by constant.
7515   unsigned ShiftOpcode = Shift->getOpcode();
7516   SDValue C1 = Shift->getOperand(1);
7517   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7518   assert(C1Node && "Expected a shift with constant operand");
7519   const APInt &C1Val = C1Node->getAPIntValue();
7520   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7521                              const APInt *&ShiftAmtVal) {
7522     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7523       return false;
7524 
7525     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7526     if (!ShiftCNode)
7527       return false;
7528 
7529     // Capture the shifted operand and shift amount value.
7530     ShiftOp = V.getOperand(0);
7531     ShiftAmtVal = &ShiftCNode->getAPIntValue();
7532 
7533     // Shift amount types do not have to match their operand type, so check that
7534     // the constants are the same width.
7535     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7536       return false;
7537 
7538     // The fold is not valid if the sum of the shift values exceeds bitwidth.
7539     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7540       return false;
7541 
7542     return true;
7543   };
7544 
7545   // Logic ops are commutative, so check each operand for a match.
7546   SDValue X, Y;
7547   const APInt *C0Val;
7548   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7549     Y = LogicOp.getOperand(1);
7550   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7551     Y = LogicOp.getOperand(0);
7552   else
7553     return SDValue();
7554 
7555   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7556   SDLoc DL(Shift);
7557   EVT VT = Shift->getValueType(0);
7558   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7559   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7560   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7561   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7562   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7563 }
7564 
7565 /// Handle transforms common to the three shifts, when the shift amount is a
7566 /// constant.
7567 /// We are looking for: (shift being one of shl/sra/srl)
7568 ///   shift (binop X, C0), C1
7569 /// And want to transform into:
7570 ///   binop (shift X, C1), (shift C0, C1)
7571 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7572   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
7573 
7574   // Do not turn a 'not' into a regular xor.
7575   if (isBitwiseNot(N->getOperand(0)))
7576     return SDValue();
7577 
7578   // The inner binop must be one-use, since we want to replace it.
7579   SDValue LHS = N->getOperand(0);
7580   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7581     return SDValue();
7582 
7583   // TODO: This is limited to early combining because it may reveal regressions
7584   //       otherwise. But since we just checked a target hook to see if this is
7585   //       desirable, that should have filtered out cases where this interferes
7586   //       with some other pattern matching.
7587   if (!LegalTypes)
7588     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7589       return R;
7590 
7591   // We want to pull some binops through shifts, so that we have (and (shift))
7592   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7593   // thing happens with address calculations, so it's important to canonicalize
7594   // it.
7595   switch (LHS.getOpcode()) {
7596   default:
7597     return SDValue();
7598   case ISD::OR:
7599   case ISD::XOR:
7600   case ISD::AND:
7601     break;
7602   case ISD::ADD:
7603     if (N->getOpcode() != ISD::SHL)
7604       return SDValue(); // only shl(add) not sr[al](add).
7605     break;
7606   }
7607 
7608   // We require the RHS of the binop to be a constant and not opaque as well.
7609   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7610   if (!BinOpCst)
7611     return SDValue();
7612 
7613   // FIXME: disable this unless the input to the binop is a shift by a constant
7614   // or is copy/select. Enable this in other cases when figure out it's exactly
7615   // profitable.
7616   SDValue BinOpLHSVal = LHS.getOperand(0);
7617   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7618                             BinOpLHSVal.getOpcode() == ISD::SRA ||
7619                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
7620                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7621   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7622                         BinOpLHSVal.getOpcode() == ISD::SELECT;
7623 
7624   if (!IsShiftByConstant && !IsCopyOrSelect)
7625     return SDValue();
7626 
7627   if (IsCopyOrSelect && N->hasOneUse())
7628     return SDValue();
7629 
7630   // Fold the constants, shifting the binop RHS by the shift amount.
7631   SDLoc DL(N);
7632   EVT VT = N->getValueType(0);
7633   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7634                                N->getOperand(1));
7635   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7636 
7637   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7638                                  N->getOperand(1));
7639   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7640 }
7641 
7642 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7643   assert(N->getOpcode() == ISD::TRUNCATE);
7644   assert(N->getOperand(0).getOpcode() == ISD::AND);
7645 
7646   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7647   EVT TruncVT = N->getValueType(0);
7648   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7649       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7650     SDValue N01 = N->getOperand(0).getOperand(1);
7651     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7652       SDLoc DL(N);
7653       SDValue N00 = N->getOperand(0).getOperand(0);
7654       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7655       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7656       AddToWorklist(Trunc00.getNode());
7657       AddToWorklist(Trunc01.getNode());
7658       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7659     }
7660   }
7661 
7662   return SDValue();
7663 }
7664 
7665 SDValue DAGCombiner::visitRotate(SDNode *N) {
7666   SDLoc dl(N);
7667   SDValue N0 = N->getOperand(0);
7668   SDValue N1 = N->getOperand(1);
7669   EVT VT = N->getValueType(0);
7670   unsigned Bitsize = VT.getScalarSizeInBits();
7671 
7672   // fold (rot x, 0) -> x
7673   if (isNullOrNullSplat(N1))
7674     return N0;
7675 
7676   // fold (rot x, c) -> x iff (c % BitSize) == 0
7677   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7678     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7679     if (DAG.MaskedValueIsZero(N1, ModuloMask))
7680       return N0;
7681   }
7682 
7683   // fold (rot x, c) -> (rot x, c % BitSize)
7684   bool OutOfRange = false;
7685   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
7686     OutOfRange |= C->getAPIntValue().uge(Bitsize);
7687     return true;
7688   };
7689   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
7690     EVT AmtVT = N1.getValueType();
7691     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
7692     if (SDValue Amt =
7693             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
7694       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
7695   }
7696 
7697   // rot i16 X, 8 --> bswap X
7698   auto *RotAmtC = isConstOrConstSplat(N1);
7699   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
7700       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
7701     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
7702 
7703   // Simplify the operands using demanded-bits information.
7704   if (SimplifyDemandedBits(SDValue(N, 0)))
7705     return SDValue(N, 0);
7706 
7707   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7708   if (N1.getOpcode() == ISD::TRUNCATE &&
7709       N1.getOperand(0).getOpcode() == ISD::AND) {
7710     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7711       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7712   }
7713 
7714   unsigned NextOp = N0.getOpcode();
7715   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7716   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7717     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7718     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7719     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7720       EVT ShiftVT = C1->getValueType(0);
7721       bool SameSide = (N->getOpcode() == NextOp);
7722       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7723       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
7724               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
7725         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7726         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7727             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
7728         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7729                            CombinedShiftNorm);
7730       }
7731     }
7732   }
7733   return SDValue();
7734 }
7735 
7736 SDValue DAGCombiner::visitSHL(SDNode *N) {
7737   SDValue N0 = N->getOperand(0);
7738   SDValue N1 = N->getOperand(1);
7739   if (SDValue V = DAG.simplifyShift(N0, N1))
7740     return V;
7741 
7742   EVT VT = N0.getValueType();
7743   EVT ShiftVT = N1.getValueType();
7744   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7745 
7746   // fold vector ops
7747   if (VT.isVector()) {
7748     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7749       return FoldedVOp;
7750 
7751     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7752     // If setcc produces all-one true value then:
7753     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7754     if (N1CV && N1CV->isConstant()) {
7755       if (N0.getOpcode() == ISD::AND) {
7756         SDValue N00 = N0->getOperand(0);
7757         SDValue N01 = N0->getOperand(1);
7758         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7759 
7760         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7761             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7762                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7763           if (SDValue C =
7764                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
7765             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7766         }
7767       }
7768     }
7769   }
7770 
7771   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7772 
7773   // fold (shl c1, c2) -> c1<<c2
7774   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
7775     return C;
7776 
7777   if (SDValue NewSel = foldBinOpIntoSelect(N))
7778     return NewSel;
7779 
7780   // if (shl x, c) is known to be zero, return 0
7781   if (DAG.MaskedValueIsZero(SDValue(N, 0),
7782                             APInt::getAllOnesValue(OpSizeInBits)))
7783     return DAG.getConstant(0, SDLoc(N), VT);
7784 
7785   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7786   if (N1.getOpcode() == ISD::TRUNCATE &&
7787       N1.getOperand(0).getOpcode() == ISD::AND) {
7788     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7789       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7790   }
7791 
7792   if (SimplifyDemandedBits(SDValue(N, 0)))
7793     return SDValue(N, 0);
7794 
7795   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7796   if (N0.getOpcode() == ISD::SHL) {
7797     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7798                                           ConstantSDNode *RHS) {
7799       APInt c1 = LHS->getAPIntValue();
7800       APInt c2 = RHS->getAPIntValue();
7801       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7802       return (c1 + c2).uge(OpSizeInBits);
7803     };
7804     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7805       return DAG.getConstant(0, SDLoc(N), VT);
7806 
7807     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7808                                        ConstantSDNode *RHS) {
7809       APInt c1 = LHS->getAPIntValue();
7810       APInt c2 = RHS->getAPIntValue();
7811       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7812       return (c1 + c2).ult(OpSizeInBits);
7813     };
7814     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7815       SDLoc DL(N);
7816       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7817       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7818     }
7819   }
7820 
7821   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7822   // For this to be valid, the second form must not preserve any of the bits
7823   // that are shifted out by the inner shift in the first form.  This means
7824   // the outer shift size must be >= the number of bits added by the ext.
7825   // As a corollary, we don't care what kind of ext it is.
7826   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7827        N0.getOpcode() == ISD::ANY_EXTEND ||
7828        N0.getOpcode() == ISD::SIGN_EXTEND) &&
7829       N0.getOperand(0).getOpcode() == ISD::SHL) {
7830     SDValue N0Op0 = N0.getOperand(0);
7831     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7832     EVT InnerVT = N0Op0.getValueType();
7833     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7834 
7835     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7836                                                          ConstantSDNode *RHS) {
7837       APInt c1 = LHS->getAPIntValue();
7838       APInt c2 = RHS->getAPIntValue();
7839       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7840       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7841              (c1 + c2).uge(OpSizeInBits);
7842     };
7843     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7844                                   /*AllowUndefs*/ false,
7845                                   /*AllowTypeMismatch*/ true))
7846       return DAG.getConstant(0, SDLoc(N), VT);
7847 
7848     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7849                                                       ConstantSDNode *RHS) {
7850       APInt c1 = LHS->getAPIntValue();
7851       APInt c2 = RHS->getAPIntValue();
7852       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7853       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7854              (c1 + c2).ult(OpSizeInBits);
7855     };
7856     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7857                                   /*AllowUndefs*/ false,
7858                                   /*AllowTypeMismatch*/ true)) {
7859       SDLoc DL(N);
7860       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7861       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7862       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7863       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7864     }
7865   }
7866 
7867   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7868   // Only fold this if the inner zext has no other uses to avoid increasing
7869   // the total number of instructions.
7870   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7871       N0.getOperand(0).getOpcode() == ISD::SRL) {
7872     SDValue N0Op0 = N0.getOperand(0);
7873     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7874 
7875     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7876       APInt c1 = LHS->getAPIntValue();
7877       APInt c2 = RHS->getAPIntValue();
7878       zeroExtendToMatch(c1, c2);
7879       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7880     };
7881     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7882                                   /*AllowUndefs*/ false,
7883                                   /*AllowTypeMismatch*/ true)) {
7884       SDLoc DL(N);
7885       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7886       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7887       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7888       AddToWorklist(NewSHL.getNode());
7889       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7890     }
7891   }
7892 
7893   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
7894   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
7895   // TODO - support non-uniform vector shift amounts.
7896   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7897       N0->getFlags().hasExact()) {
7898     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7899       uint64_t C1 = N0C1->getZExtValue();
7900       uint64_t C2 = N1C->getZExtValue();
7901       SDLoc DL(N);
7902       if (C1 <= C2)
7903         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7904                            DAG.getConstant(C2 - C1, DL, ShiftVT));
7905       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7906                          DAG.getConstant(C1 - C2, DL, ShiftVT));
7907     }
7908   }
7909 
7910   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7911   //                               (and (srl x, (sub c1, c2), MASK)
7912   // Only fold this if the inner shift has no other uses -- if it does, folding
7913   // this will increase the total number of instructions.
7914   // TODO - drop hasOneUse requirement if c1 == c2?
7915   // TODO - support non-uniform vector shift amounts.
7916   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7917       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
7918     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7919       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7920         uint64_t c1 = N0C1->getZExtValue();
7921         uint64_t c2 = N1C->getZExtValue();
7922         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7923         SDValue Shift;
7924         if (c2 > c1) {
7925           Mask <<= c2 - c1;
7926           SDLoc DL(N);
7927           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7928                               DAG.getConstant(c2 - c1, DL, ShiftVT));
7929         } else {
7930           Mask.lshrInPlace(c1 - c2);
7931           SDLoc DL(N);
7932           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7933                               DAG.getConstant(c1 - c2, DL, ShiftVT));
7934         }
7935         SDLoc DL(N0);
7936         return DAG.getNode(ISD::AND, DL, VT, Shift,
7937                            DAG.getConstant(Mask, DL, VT));
7938       }
7939     }
7940   }
7941 
7942   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7943   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7944       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7945     SDLoc DL(N);
7946     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7947     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7948     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7949   }
7950 
7951   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7952   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7953   // Variant of version done on multiply, except mul by a power of 2 is turned
7954   // into a shift.
7955   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7956       N0.getNode()->hasOneUse() &&
7957       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7958       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7959       TLI.isDesirableToCommuteWithShift(N, Level)) {
7960     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7961     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7962     AddToWorklist(Shl0.getNode());
7963     AddToWorklist(Shl1.getNode());
7964     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7965   }
7966 
7967   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7968   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7969       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7970       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7971     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7972     if (isConstantOrConstantVector(Shl))
7973       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7974   }
7975 
7976   if (N1C && !N1C->isOpaque())
7977     if (SDValue NewSHL = visitShiftByConstant(N))
7978       return NewSHL;
7979 
7980   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
7981   if (N0.getOpcode() == ISD::VSCALE)
7982     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
7983       auto DL = SDLoc(N);
7984       APInt C0 = N0.getConstantOperandAPInt(0);
7985       APInt C1 = NC1->getAPIntValue();
7986       return DAG.getVScale(DL, VT, C0 << C1);
7987     }
7988 
7989   return SDValue();
7990 }
7991 
7992 // Transform a right shift of a multiply into a multiply-high.
7993 // Examples:
7994 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
7995 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
7996 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
7997                                   const TargetLowering &TLI) {
7998   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
7999          "SRL or SRA node is required here!");
8000 
8001   // Check the shift amount. Proceed with the transformation if the shift
8002   // amount is constant.
8003   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8004   if (!ShiftAmtSrc)
8005     return SDValue();
8006 
8007   SDLoc DL(N);
8008 
8009   // The operation feeding into the shift must be a multiply.
8010   SDValue ShiftOperand = N->getOperand(0);
8011   if (ShiftOperand.getOpcode() != ISD::MUL)
8012     return SDValue();
8013 
8014   // Both operands must be equivalent extend nodes.
8015   SDValue LeftOp = ShiftOperand.getOperand(0);
8016   SDValue RightOp = ShiftOperand.getOperand(1);
8017   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8018   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8019 
8020   if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8021     return SDValue();
8022 
8023   EVT WideVT1 = LeftOp.getValueType();
8024   EVT WideVT2 = RightOp.getValueType();
8025   (void)WideVT2;
8026   // Proceed with the transformation if the wide types match.
8027   assert((WideVT1 == WideVT2) &&
8028          "Cannot have a multiply node with two different operand types.");
8029 
8030   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8031   // Check that the two extend nodes are the same type.
8032   if (NarrowVT !=  RightOp.getOperand(0).getValueType())
8033     return SDValue();
8034 
8035   // Only transform into mulh if mulh for the narrow type is cheaper than
8036   // a multiply followed by a shift. This should also check if mulh is
8037   // legal for NarrowVT on the target.
8038   if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
8039       return SDValue();
8040 
8041   // Proceed with the transformation if the wide type is twice as large
8042   // as the narrow type.
8043   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8044   if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8045     return SDValue();
8046 
8047   // Check the shift amount with the narrow type size.
8048   // Proceed with the transformation if the shift amount is the width
8049   // of the narrow type.
8050   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8051   if (ShiftAmt != NarrowVTSize)
8052     return SDValue();
8053 
8054   // If the operation feeding into the MUL is a sign extend (sext),
8055   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8056   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8057 
8058   SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8059                                RightOp.getOperand(0));
8060   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8061                                      : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8062 }
8063 
8064 SDValue DAGCombiner::visitSRA(SDNode *N) {
8065   SDValue N0 = N->getOperand(0);
8066   SDValue N1 = N->getOperand(1);
8067   if (SDValue V = DAG.simplifyShift(N0, N1))
8068     return V;
8069 
8070   EVT VT = N0.getValueType();
8071   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8072 
8073   // Arithmetic shifting an all-sign-bit value is a no-op.
8074   // fold (sra 0, x) -> 0
8075   // fold (sra -1, x) -> -1
8076   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8077     return N0;
8078 
8079   // fold vector ops
8080   if (VT.isVector())
8081     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8082       return FoldedVOp;
8083 
8084   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8085 
8086   // fold (sra c1, c2) -> (sra c1, c2)
8087   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8088     return C;
8089 
8090   if (SDValue NewSel = foldBinOpIntoSelect(N))
8091     return NewSel;
8092 
8093   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8094   // sext_inreg.
8095   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8096     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8097     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8098     if (VT.isVector())
8099       ExtVT = EVT::getVectorVT(*DAG.getContext(),
8100                                ExtVT, VT.getVectorNumElements());
8101     if (!LegalOperations ||
8102         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8103         TargetLowering::Legal)
8104       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8105                          N0.getOperand(0), DAG.getValueType(ExtVT));
8106   }
8107 
8108   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8109   // clamp (add c1, c2) to max shift.
8110   if (N0.getOpcode() == ISD::SRA) {
8111     SDLoc DL(N);
8112     EVT ShiftVT = N1.getValueType();
8113     EVT ShiftSVT = ShiftVT.getScalarType();
8114     SmallVector<SDValue, 16> ShiftValues;
8115 
8116     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8117       APInt c1 = LHS->getAPIntValue();
8118       APInt c2 = RHS->getAPIntValue();
8119       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8120       APInt Sum = c1 + c2;
8121       unsigned ShiftSum =
8122           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8123       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8124       return true;
8125     };
8126     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8127       SDValue ShiftValue;
8128       if (VT.isVector())
8129         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8130       else
8131         ShiftValue = ShiftValues[0];
8132       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8133     }
8134   }
8135 
8136   // fold (sra (shl X, m), (sub result_size, n))
8137   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8138   // result_size - n != m.
8139   // If truncate is free for the target sext(shl) is likely to result in better
8140   // code.
8141   if (N0.getOpcode() == ISD::SHL && N1C) {
8142     // Get the two constanst of the shifts, CN0 = m, CN = n.
8143     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8144     if (N01C) {
8145       LLVMContext &Ctx = *DAG.getContext();
8146       // Determine what the truncate's result bitsize and type would be.
8147       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8148 
8149       if (VT.isVector())
8150         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8151 
8152       // Determine the residual right-shift amount.
8153       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8154 
8155       // If the shift is not a no-op (in which case this should be just a sign
8156       // extend already), the truncated to type is legal, sign_extend is legal
8157       // on that type, and the truncate to that type is both legal and free,
8158       // perform the transform.
8159       if ((ShiftAmt > 0) &&
8160           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8161           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8162           TLI.isTruncateFree(VT, TruncVT)) {
8163         SDLoc DL(N);
8164         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8165             getShiftAmountTy(N0.getOperand(0).getValueType()));
8166         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8167                                     N0.getOperand(0), Amt);
8168         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8169                                     Shift);
8170         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8171                            N->getValueType(0), Trunc);
8172       }
8173     }
8174   }
8175 
8176   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8177   //   sra (add (shl X, N1C), AddC), N1C -->
8178   //   sext (add (trunc X to (width - N1C)), AddC')
8179   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8180       N0.getOperand(0).getOpcode() == ISD::SHL &&
8181       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8182     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8183       SDValue Shl = N0.getOperand(0);
8184       // Determine what the truncate's type would be and ask the target if that
8185       // is a free operation.
8186       LLVMContext &Ctx = *DAG.getContext();
8187       unsigned ShiftAmt = N1C->getZExtValue();
8188       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8189       if (VT.isVector())
8190         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8191 
8192       // TODO: The simple type check probably belongs in the default hook
8193       //       implementation and/or target-specific overrides (because
8194       //       non-simple types likely require masking when legalized), but that
8195       //       restriction may conflict with other transforms.
8196       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8197           TLI.isTruncateFree(VT, TruncVT)) {
8198         SDLoc DL(N);
8199         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8200         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8201                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8202         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8203         return DAG.getSExtOrTrunc(Add, DL, VT);
8204       }
8205     }
8206   }
8207 
8208   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8209   if (N1.getOpcode() == ISD::TRUNCATE &&
8210       N1.getOperand(0).getOpcode() == ISD::AND) {
8211     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8212       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8213   }
8214 
8215   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8216   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8217   //      if c1 is equal to the number of bits the trunc removes
8218   // TODO - support non-uniform vector shift amounts.
8219   if (N0.getOpcode() == ISD::TRUNCATE &&
8220       (N0.getOperand(0).getOpcode() == ISD::SRL ||
8221        N0.getOperand(0).getOpcode() == ISD::SRA) &&
8222       N0.getOperand(0).hasOneUse() &&
8223       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8224     SDValue N0Op0 = N0.getOperand(0);
8225     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8226       EVT LargeVT = N0Op0.getValueType();
8227       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8228       if (LargeShift->getAPIntValue() == TruncBits) {
8229         SDLoc DL(N);
8230         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8231                                       getShiftAmountTy(LargeVT));
8232         SDValue SRA =
8233             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8234         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8235       }
8236     }
8237   }
8238 
8239   // Simplify, based on bits shifted out of the LHS.
8240   if (SimplifyDemandedBits(SDValue(N, 0)))
8241     return SDValue(N, 0);
8242 
8243   // If the sign bit is known to be zero, switch this to a SRL.
8244   if (DAG.SignBitIsZero(N0))
8245     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8246 
8247   if (N1C && !N1C->isOpaque())
8248     if (SDValue NewSRA = visitShiftByConstant(N))
8249       return NewSRA;
8250 
8251   // Try to transform this shift into a multiply-high if
8252   // it matches the appropriate pattern detected in combineShiftToMULH.
8253   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8254     return MULH;
8255 
8256   return SDValue();
8257 }
8258 
8259 SDValue DAGCombiner::visitSRL(SDNode *N) {
8260   SDValue N0 = N->getOperand(0);
8261   SDValue N1 = N->getOperand(1);
8262   if (SDValue V = DAG.simplifyShift(N0, N1))
8263     return V;
8264 
8265   EVT VT = N0.getValueType();
8266   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8267 
8268   // fold vector ops
8269   if (VT.isVector())
8270     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8271       return FoldedVOp;
8272 
8273   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8274 
8275   // fold (srl c1, c2) -> c1 >>u c2
8276   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8277     return C;
8278 
8279   if (SDValue NewSel = foldBinOpIntoSelect(N))
8280     return NewSel;
8281 
8282   // if (srl x, c) is known to be zero, return 0
8283   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8284                                    APInt::getAllOnesValue(OpSizeInBits)))
8285     return DAG.getConstant(0, SDLoc(N), VT);
8286 
8287   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8288   if (N0.getOpcode() == ISD::SRL) {
8289     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8290                                           ConstantSDNode *RHS) {
8291       APInt c1 = LHS->getAPIntValue();
8292       APInt c2 = RHS->getAPIntValue();
8293       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8294       return (c1 + c2).uge(OpSizeInBits);
8295     };
8296     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8297       return DAG.getConstant(0, SDLoc(N), VT);
8298 
8299     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8300                                        ConstantSDNode *RHS) {
8301       APInt c1 = LHS->getAPIntValue();
8302       APInt c2 = RHS->getAPIntValue();
8303       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8304       return (c1 + c2).ult(OpSizeInBits);
8305     };
8306     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8307       SDLoc DL(N);
8308       EVT ShiftVT = N1.getValueType();
8309       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8310       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8311     }
8312   }
8313 
8314   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8315       N0.getOperand(0).getOpcode() == ISD::SRL) {
8316     SDValue InnerShift = N0.getOperand(0);
8317     // TODO - support non-uniform vector shift amounts.
8318     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8319       uint64_t c1 = N001C->getZExtValue();
8320       uint64_t c2 = N1C->getZExtValue();
8321       EVT InnerShiftVT = InnerShift.getValueType();
8322       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8323       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8324       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8325       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8326       if (c1 + OpSizeInBits == InnerShiftSize) {
8327         SDLoc DL(N);
8328         if (c1 + c2 >= InnerShiftSize)
8329           return DAG.getConstant(0, DL, VT);
8330         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8331         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8332                                        InnerShift.getOperand(0), NewShiftAmt);
8333         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8334       }
8335       // In the more general case, we can clear the high bits after the shift:
8336       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8337       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8338           c1 + c2 < InnerShiftSize) {
8339         SDLoc DL(N);
8340         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8341         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8342                                        InnerShift.getOperand(0), NewShiftAmt);
8343         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8344                                                             OpSizeInBits - c2),
8345                                        DL, InnerShiftVT);
8346         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8347         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8348       }
8349     }
8350   }
8351 
8352   // fold (srl (shl x, c), c) -> (and x, cst2)
8353   // TODO - (srl (shl x, c1), c2).
8354   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8355       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8356     SDLoc DL(N);
8357     SDValue Mask =
8358         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8359     AddToWorklist(Mask.getNode());
8360     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8361   }
8362 
8363   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8364   // TODO - support non-uniform vector shift amounts.
8365   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8366     // Shifting in all undef bits?
8367     EVT SmallVT = N0.getOperand(0).getValueType();
8368     unsigned BitSize = SmallVT.getScalarSizeInBits();
8369     if (N1C->getAPIntValue().uge(BitSize))
8370       return DAG.getUNDEF(VT);
8371 
8372     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8373       uint64_t ShiftAmt = N1C->getZExtValue();
8374       SDLoc DL0(N0);
8375       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8376                                        N0.getOperand(0),
8377                           DAG.getConstant(ShiftAmt, DL0,
8378                                           getShiftAmountTy(SmallVT)));
8379       AddToWorklist(SmallShift.getNode());
8380       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8381       SDLoc DL(N);
8382       return DAG.getNode(ISD::AND, DL, VT,
8383                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8384                          DAG.getConstant(Mask, DL, VT));
8385     }
8386   }
8387 
8388   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8389   // bit, which is unmodified by sra.
8390   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8391     if (N0.getOpcode() == ISD::SRA)
8392       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8393   }
8394 
8395   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8396   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8397       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8398     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8399 
8400     // If any of the input bits are KnownOne, then the input couldn't be all
8401     // zeros, thus the result of the srl will always be zero.
8402     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8403 
8404     // If all of the bits input the to ctlz node are known to be zero, then
8405     // the result of the ctlz is "32" and the result of the shift is one.
8406     APInt UnknownBits = ~Known.Zero;
8407     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8408 
8409     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8410     if (UnknownBits.isPowerOf2()) {
8411       // Okay, we know that only that the single bit specified by UnknownBits
8412       // could be set on input to the CTLZ node. If this bit is set, the SRL
8413       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8414       // to an SRL/XOR pair, which is likely to simplify more.
8415       unsigned ShAmt = UnknownBits.countTrailingZeros();
8416       SDValue Op = N0.getOperand(0);
8417 
8418       if (ShAmt) {
8419         SDLoc DL(N0);
8420         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8421                   DAG.getConstant(ShAmt, DL,
8422                                   getShiftAmountTy(Op.getValueType())));
8423         AddToWorklist(Op.getNode());
8424       }
8425 
8426       SDLoc DL(N);
8427       return DAG.getNode(ISD::XOR, DL, VT,
8428                          Op, DAG.getConstant(1, DL, VT));
8429     }
8430   }
8431 
8432   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8433   if (N1.getOpcode() == ISD::TRUNCATE &&
8434       N1.getOperand(0).getOpcode() == ISD::AND) {
8435     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8436       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8437   }
8438 
8439   // fold operands of srl based on knowledge that the low bits are not
8440   // demanded.
8441   if (SimplifyDemandedBits(SDValue(N, 0)))
8442     return SDValue(N, 0);
8443 
8444   if (N1C && !N1C->isOpaque())
8445     if (SDValue NewSRL = visitShiftByConstant(N))
8446       return NewSRL;
8447 
8448   // Attempt to convert a srl of a load into a narrower zero-extending load.
8449   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8450     return NarrowLoad;
8451 
8452   // Here is a common situation. We want to optimize:
8453   //
8454   //   %a = ...
8455   //   %b = and i32 %a, 2
8456   //   %c = srl i32 %b, 1
8457   //   brcond i32 %c ...
8458   //
8459   // into
8460   //
8461   //   %a = ...
8462   //   %b = and %a, 2
8463   //   %c = setcc eq %b, 0
8464   //   brcond %c ...
8465   //
8466   // However when after the source operand of SRL is optimized into AND, the SRL
8467   // itself may not be optimized further. Look for it and add the BRCOND into
8468   // the worklist.
8469   if (N->hasOneUse()) {
8470     SDNode *Use = *N->use_begin();
8471     if (Use->getOpcode() == ISD::BRCOND)
8472       AddToWorklist(Use);
8473     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8474       // Also look pass the truncate.
8475       Use = *Use->use_begin();
8476       if (Use->getOpcode() == ISD::BRCOND)
8477         AddToWorklist(Use);
8478     }
8479   }
8480 
8481   // Try to transform this shift into a multiply-high if
8482   // it matches the appropriate pattern detected in combineShiftToMULH.
8483   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8484     return MULH;
8485 
8486   return SDValue();
8487 }
8488 
8489 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8490   EVT VT = N->getValueType(0);
8491   SDValue N0 = N->getOperand(0);
8492   SDValue N1 = N->getOperand(1);
8493   SDValue N2 = N->getOperand(2);
8494   bool IsFSHL = N->getOpcode() == ISD::FSHL;
8495   unsigned BitWidth = VT.getScalarSizeInBits();
8496 
8497   // fold (fshl N0, N1, 0) -> N0
8498   // fold (fshr N0, N1, 0) -> N1
8499   if (isPowerOf2_32(BitWidth))
8500     if (DAG.MaskedValueIsZero(
8501             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8502       return IsFSHL ? N0 : N1;
8503 
8504   auto IsUndefOrZero = [](SDValue V) {
8505     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8506   };
8507 
8508   // TODO - support non-uniform vector shift amounts.
8509   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8510     EVT ShAmtTy = N2.getValueType();
8511 
8512     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8513     if (Cst->getAPIntValue().uge(BitWidth)) {
8514       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8515       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8516                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8517     }
8518 
8519     unsigned ShAmt = Cst->getZExtValue();
8520     if (ShAmt == 0)
8521       return IsFSHL ? N0 : N1;
8522 
8523     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8524     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8525     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8526     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8527     if (IsUndefOrZero(N0))
8528       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8529                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8530                                          SDLoc(N), ShAmtTy));
8531     if (IsUndefOrZero(N1))
8532       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8533                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8534                                          SDLoc(N), ShAmtTy));
8535 
8536     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8537     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8538     // TODO - bigendian support once we have test coverage.
8539     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
8540     // TODO - permit LHS EXTLOAD if extensions are shifted out.
8541     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
8542         !DAG.getDataLayout().isBigEndian()) {
8543       auto *LHS = dyn_cast<LoadSDNode>(N0);
8544       auto *RHS = dyn_cast<LoadSDNode>(N1);
8545       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
8546           LHS->getAddressSpace() == RHS->getAddressSpace() &&
8547           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
8548           ISD::isNON_EXTLoad(LHS)) {
8549         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
8550           SDLoc DL(RHS);
8551           uint64_t PtrOff =
8552               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
8553           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
8554           bool Fast = false;
8555           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8556                                      RHS->getAddressSpace(), NewAlign,
8557                                      RHS->getMemOperand()->getFlags(), &Fast) &&
8558               Fast) {
8559             SDValue NewPtr =
8560                 DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL);
8561             AddToWorklist(NewPtr.getNode());
8562             SDValue Load = DAG.getLoad(
8563                 VT, DL, RHS->getChain(), NewPtr,
8564                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8565                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
8566             // Replace the old load's chain with the new load's chain.
8567             WorklistRemover DeadNodes(*this);
8568             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
8569             return Load;
8570           }
8571         }
8572       }
8573     }
8574   }
8575 
8576   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8577   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8578   // iff We know the shift amount is in range.
8579   // TODO: when is it worth doing SUB(BW, N2) as well?
8580   if (isPowerOf2_32(BitWidth)) {
8581     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8582     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8583       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8584     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8585       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8586   }
8587 
8588   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
8589   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
8590   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8591   // is legal as well we might be better off avoiding non-constant (BW - N2).
8592   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8593   if (N0 == N1 && hasOperation(RotOpc, VT))
8594     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8595 
8596   // Simplify, based on bits shifted out of N0/N1.
8597   if (SimplifyDemandedBits(SDValue(N, 0)))
8598     return SDValue(N, 0);
8599 
8600   return SDValue();
8601 }
8602 
8603 SDValue DAGCombiner::visitABS(SDNode *N) {
8604   SDValue N0 = N->getOperand(0);
8605   EVT VT = N->getValueType(0);
8606 
8607   // fold (abs c1) -> c2
8608   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8609     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8610   // fold (abs (abs x)) -> (abs x)
8611   if (N0.getOpcode() == ISD::ABS)
8612     return N0;
8613   // fold (abs x) -> x iff not-negative
8614   if (DAG.SignBitIsZero(N0))
8615     return N0;
8616   return SDValue();
8617 }
8618 
8619 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8620   SDValue N0 = N->getOperand(0);
8621   EVT VT = N->getValueType(0);
8622 
8623   // fold (bswap c1) -> c2
8624   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8625     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8626   // fold (bswap (bswap x)) -> x
8627   if (N0.getOpcode() == ISD::BSWAP)
8628     return N0->getOperand(0);
8629   return SDValue();
8630 }
8631 
8632 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8633   SDValue N0 = N->getOperand(0);
8634   EVT VT = N->getValueType(0);
8635 
8636   // fold (bitreverse c1) -> c2
8637   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8638     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8639   // fold (bitreverse (bitreverse x)) -> x
8640   if (N0.getOpcode() == ISD::BITREVERSE)
8641     return N0.getOperand(0);
8642   return SDValue();
8643 }
8644 
8645 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8646   SDValue N0 = N->getOperand(0);
8647   EVT VT = N->getValueType(0);
8648 
8649   // fold (ctlz c1) -> c2
8650   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8651     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8652 
8653   // If the value is known never to be zero, switch to the undef version.
8654   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8655     if (DAG.isKnownNeverZero(N0))
8656       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8657   }
8658 
8659   return SDValue();
8660 }
8661 
8662 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8663   SDValue N0 = N->getOperand(0);
8664   EVT VT = N->getValueType(0);
8665 
8666   // fold (ctlz_zero_undef c1) -> c2
8667   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8668     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8669   return SDValue();
8670 }
8671 
8672 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8673   SDValue N0 = N->getOperand(0);
8674   EVT VT = N->getValueType(0);
8675 
8676   // fold (cttz c1) -> c2
8677   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8678     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8679 
8680   // If the value is known never to be zero, switch to the undef version.
8681   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8682     if (DAG.isKnownNeverZero(N0))
8683       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8684   }
8685 
8686   return SDValue();
8687 }
8688 
8689 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8690   SDValue N0 = N->getOperand(0);
8691   EVT VT = N->getValueType(0);
8692 
8693   // fold (cttz_zero_undef c1) -> c2
8694   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8695     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8696   return SDValue();
8697 }
8698 
8699 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8700   SDValue N0 = N->getOperand(0);
8701   EVT VT = N->getValueType(0);
8702 
8703   // fold (ctpop c1) -> c2
8704   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8705     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8706   return SDValue();
8707 }
8708 
8709 // FIXME: This should be checking for no signed zeros on individual operands, as
8710 // well as no nans.
8711 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8712                                          SDValue RHS,
8713                                          const TargetLowering &TLI) {
8714   const TargetOptions &Options = DAG.getTarget().Options;
8715   EVT VT = LHS.getValueType();
8716 
8717   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8718          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8719          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8720 }
8721 
8722 /// Generate Min/Max node
8723 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8724                                    SDValue RHS, SDValue True, SDValue False,
8725                                    ISD::CondCode CC, const TargetLowering &TLI,
8726                                    SelectionDAG &DAG) {
8727   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8728     return SDValue();
8729 
8730   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8731   switch (CC) {
8732   case ISD::SETOLT:
8733   case ISD::SETOLE:
8734   case ISD::SETLT:
8735   case ISD::SETLE:
8736   case ISD::SETULT:
8737   case ISD::SETULE: {
8738     // Since it's known never nan to get here already, either fminnum or
8739     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8740     // expanded in terms of it.
8741     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8742     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8743       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8744 
8745     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8746     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8747       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8748     return SDValue();
8749   }
8750   case ISD::SETOGT:
8751   case ISD::SETOGE:
8752   case ISD::SETGT:
8753   case ISD::SETGE:
8754   case ISD::SETUGT:
8755   case ISD::SETUGE: {
8756     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8757     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8758       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8759 
8760     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8761     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8762       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8763     return SDValue();
8764   }
8765   default:
8766     return SDValue();
8767   }
8768 }
8769 
8770 /// If a (v)select has a condition value that is a sign-bit test, try to smear
8771 /// the condition operand sign-bit across the value width and use it as a mask.
8772 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
8773   SDValue Cond = N->getOperand(0);
8774   SDValue C1 = N->getOperand(1);
8775   SDValue C2 = N->getOperand(2);
8776   assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
8777          "Expected select-of-constants");
8778 
8779   EVT VT = N->getValueType(0);
8780   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
8781       VT != Cond.getOperand(0).getValueType())
8782     return SDValue();
8783 
8784   // The inverted-condition + commuted-select variants of these patterns are
8785   // canonicalized to these forms in IR.
8786   SDValue X = Cond.getOperand(0);
8787   SDValue CondC = Cond.getOperand(1);
8788   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
8789   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
8790       isAllOnesOrAllOnesSplat(C2)) {
8791     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
8792     SDLoc DL(N);
8793     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8794     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8795     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
8796   }
8797   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
8798     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
8799     SDLoc DL(N);
8800     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8801     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8802     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
8803   }
8804   return SDValue();
8805 }
8806 
8807 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8808   SDValue Cond = N->getOperand(0);
8809   SDValue N1 = N->getOperand(1);
8810   SDValue N2 = N->getOperand(2);
8811   EVT VT = N->getValueType(0);
8812   EVT CondVT = Cond.getValueType();
8813   SDLoc DL(N);
8814 
8815   if (!VT.isInteger())
8816     return SDValue();
8817 
8818   auto *C1 = dyn_cast<ConstantSDNode>(N1);
8819   auto *C2 = dyn_cast<ConstantSDNode>(N2);
8820   if (!C1 || !C2)
8821     return SDValue();
8822 
8823   // Only do this before legalization to avoid conflicting with target-specific
8824   // transforms in the other direction (create a select from a zext/sext). There
8825   // is also a target-independent combine here in DAGCombiner in the other
8826   // direction for (select Cond, -1, 0) when the condition is not i1.
8827   if (CondVT == MVT::i1 && !LegalOperations) {
8828     if (C1->isNullValue() && C2->isOne()) {
8829       // select Cond, 0, 1 --> zext (!Cond)
8830       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8831       if (VT != MVT::i1)
8832         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8833       return NotCond;
8834     }
8835     if (C1->isNullValue() && C2->isAllOnesValue()) {
8836       // select Cond, 0, -1 --> sext (!Cond)
8837       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8838       if (VT != MVT::i1)
8839         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8840       return NotCond;
8841     }
8842     if (C1->isOne() && C2->isNullValue()) {
8843       // select Cond, 1, 0 --> zext (Cond)
8844       if (VT != MVT::i1)
8845         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8846       return Cond;
8847     }
8848     if (C1->isAllOnesValue() && C2->isNullValue()) {
8849       // select Cond, -1, 0 --> sext (Cond)
8850       if (VT != MVT::i1)
8851         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8852       return Cond;
8853     }
8854 
8855     // Use a target hook because some targets may prefer to transform in the
8856     // other direction.
8857     if (TLI.convertSelectOfConstantsToMath(VT)) {
8858       // For any constants that differ by 1, we can transform the select into an
8859       // extend and add.
8860       const APInt &C1Val = C1->getAPIntValue();
8861       const APInt &C2Val = C2->getAPIntValue();
8862       if (C1Val - 1 == C2Val) {
8863         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8864         if (VT != MVT::i1)
8865           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8866         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8867       }
8868       if (C1Val + 1 == C2Val) {
8869         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8870         if (VT != MVT::i1)
8871           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8872         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8873       }
8874 
8875       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
8876       if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
8877         if (VT != MVT::i1)
8878           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8879         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
8880         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
8881       }
8882 
8883       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
8884         return V;
8885     }
8886 
8887     return SDValue();
8888   }
8889 
8890   // fold (select Cond, 0, 1) -> (xor Cond, 1)
8891   // We can't do this reliably if integer based booleans have different contents
8892   // to floating point based booleans. This is because we can't tell whether we
8893   // have an integer-based boolean or a floating-point-based boolean unless we
8894   // can find the SETCC that produced it and inspect its operands. This is
8895   // fairly easy if C is the SETCC node, but it can potentially be
8896   // undiscoverable (or not reasonably discoverable). For example, it could be
8897   // in another basic block or it could require searching a complicated
8898   // expression.
8899   if (CondVT.isInteger() &&
8900       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8901           TargetLowering::ZeroOrOneBooleanContent &&
8902       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8903           TargetLowering::ZeroOrOneBooleanContent &&
8904       C1->isNullValue() && C2->isOne()) {
8905     SDValue NotCond =
8906         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8907     if (VT.bitsEq(CondVT))
8908       return NotCond;
8909     return DAG.getZExtOrTrunc(NotCond, DL, VT);
8910   }
8911 
8912   return SDValue();
8913 }
8914 
8915 SDValue DAGCombiner::visitSELECT(SDNode *N) {
8916   SDValue N0 = N->getOperand(0);
8917   SDValue N1 = N->getOperand(1);
8918   SDValue N2 = N->getOperand(2);
8919   EVT VT = N->getValueType(0);
8920   EVT VT0 = N0.getValueType();
8921   SDLoc DL(N);
8922   SDNodeFlags Flags = N->getFlags();
8923 
8924   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8925     return V;
8926 
8927   // fold (select X, X, Y) -> (or X, Y)
8928   // fold (select X, 1, Y) -> (or C, Y)
8929   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8930     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8931 
8932   if (SDValue V = foldSelectOfConstants(N))
8933     return V;
8934 
8935   // fold (select C, 0, X) -> (and (not C), X)
8936   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8937     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8938     AddToWorklist(NOTNode.getNode());
8939     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8940   }
8941   // fold (select C, X, 1) -> (or (not C), X)
8942   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8943     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8944     AddToWorklist(NOTNode.getNode());
8945     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8946   }
8947   // fold (select X, Y, X) -> (and X, Y)
8948   // fold (select X, Y, 0) -> (and X, Y)
8949   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8950     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8951 
8952   // If we can fold this based on the true/false value, do so.
8953   if (SimplifySelectOps(N, N1, N2))
8954     return SDValue(N, 0); // Don't revisit N.
8955 
8956   if (VT0 == MVT::i1) {
8957     // The code in this block deals with the following 2 equivalences:
8958     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8959     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8960     // The target can specify its preferred form with the
8961     // shouldNormalizeToSelectSequence() callback. However we always transform
8962     // to the right anyway if we find the inner select exists in the DAG anyway
8963     // and we always transform to the left side if we know that we can further
8964     // optimize the combination of the conditions.
8965     bool normalizeToSequence =
8966         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8967     // select (and Cond0, Cond1), X, Y
8968     //   -> select Cond0, (select Cond1, X, Y), Y
8969     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8970       SDValue Cond0 = N0->getOperand(0);
8971       SDValue Cond1 = N0->getOperand(1);
8972       SDValue InnerSelect =
8973           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8974       if (normalizeToSequence || !InnerSelect.use_empty())
8975         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8976                            InnerSelect, N2, Flags);
8977       // Cleanup on failure.
8978       if (InnerSelect.use_empty())
8979         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8980     }
8981     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8982     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8983       SDValue Cond0 = N0->getOperand(0);
8984       SDValue Cond1 = N0->getOperand(1);
8985       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8986                                         Cond1, N1, N2, Flags);
8987       if (normalizeToSequence || !InnerSelect.use_empty())
8988         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8989                            InnerSelect, Flags);
8990       // Cleanup on failure.
8991       if (InnerSelect.use_empty())
8992         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8993     }
8994 
8995     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8996     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8997       SDValue N1_0 = N1->getOperand(0);
8998       SDValue N1_1 = N1->getOperand(1);
8999       SDValue N1_2 = N1->getOperand(2);
9000       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9001         // Create the actual and node if we can generate good code for it.
9002         if (!normalizeToSequence) {
9003           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9004           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9005                              N2, Flags);
9006         }
9007         // Otherwise see if we can optimize the "and" to a better pattern.
9008         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9009           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9010                              N2, Flags);
9011         }
9012       }
9013     }
9014     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9015     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9016       SDValue N2_0 = N2->getOperand(0);
9017       SDValue N2_1 = N2->getOperand(1);
9018       SDValue N2_2 = N2->getOperand(2);
9019       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9020         // Create the actual or node if we can generate good code for it.
9021         if (!normalizeToSequence) {
9022           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9023           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9024                              N2_2, Flags);
9025         }
9026         // Otherwise see if we can optimize to a better pattern.
9027         if (SDValue Combined = visitORLike(N0, N2_0, N))
9028           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9029                              N2_2, Flags);
9030       }
9031     }
9032   }
9033 
9034   // select (not Cond), N1, N2 -> select Cond, N2, N1
9035   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9036     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9037     SelectOp->setFlags(Flags);
9038     return SelectOp;
9039   }
9040 
9041   // Fold selects based on a setcc into other things, such as min/max/abs.
9042   if (N0.getOpcode() == ISD::SETCC) {
9043     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9044     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9045 
9046     // select (fcmp lt x, y), x, y -> fminnum x, y
9047     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9048     //
9049     // This is OK if we don't care what happens if either operand is a NaN.
9050     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9051       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9052                                                 CC, TLI, DAG))
9053         return FMinMax;
9054 
9055     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9056     // This is conservatively limited to pre-legal-operations to give targets
9057     // a chance to reverse the transform if they want to do that. Also, it is
9058     // unlikely that the pattern would be formed late, so it's probably not
9059     // worth going through the other checks.
9060     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9061         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9062         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9063       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9064       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9065       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9066         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9067         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9068         //
9069         // The IR equivalent of this transform would have this form:
9070         //   %a = add %x, C
9071         //   %c = icmp ugt %x, ~C
9072         //   %r = select %c, -1, %a
9073         //   =>
9074         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9075         //   %u0 = extractvalue %u, 0
9076         //   %u1 = extractvalue %u, 1
9077         //   %r = select %u1, -1, %u0
9078         SDVTList VTs = DAG.getVTList(VT, VT0);
9079         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9080         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9081       }
9082     }
9083 
9084     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9085         (!LegalOperations &&
9086          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9087       // Any flags available in a select/setcc fold will be on the setcc as they
9088       // migrated from fcmp
9089       Flags = N0.getNode()->getFlags();
9090       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9091                                        N2, N0.getOperand(2));
9092       SelectNode->setFlags(Flags);
9093       return SelectNode;
9094     }
9095 
9096     return SimplifySelect(DL, N0, N1, N2);
9097   }
9098 
9099   return SDValue();
9100 }
9101 
9102 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9103 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9104 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9105   SDLoc DL(N);
9106   SDValue Cond = N->getOperand(0);
9107   SDValue LHS = N->getOperand(1);
9108   SDValue RHS = N->getOperand(2);
9109   EVT VT = N->getValueType(0);
9110   int NumElems = VT.getVectorNumElements();
9111   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9112          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9113          Cond.getOpcode() == ISD::BUILD_VECTOR);
9114 
9115   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9116   // binary ones here.
9117   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9118     return SDValue();
9119 
9120   // We're sure we have an even number of elements due to the
9121   // concat_vectors we have as arguments to vselect.
9122   // Skip BV elements until we find one that's not an UNDEF
9123   // After we find an UNDEF element, keep looping until we get to half the
9124   // length of the BV and see if all the non-undef nodes are the same.
9125   ConstantSDNode *BottomHalf = nullptr;
9126   for (int i = 0; i < NumElems / 2; ++i) {
9127     if (Cond->getOperand(i)->isUndef())
9128       continue;
9129 
9130     if (BottomHalf == nullptr)
9131       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9132     else if (Cond->getOperand(i).getNode() != BottomHalf)
9133       return SDValue();
9134   }
9135 
9136   // Do the same for the second half of the BuildVector
9137   ConstantSDNode *TopHalf = nullptr;
9138   for (int i = NumElems / 2; i < NumElems; ++i) {
9139     if (Cond->getOperand(i)->isUndef())
9140       continue;
9141 
9142     if (TopHalf == nullptr)
9143       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9144     else if (Cond->getOperand(i).getNode() != TopHalf)
9145       return SDValue();
9146   }
9147 
9148   assert(TopHalf && BottomHalf &&
9149          "One half of the selector was all UNDEFs and the other was all the "
9150          "same value. This should have been addressed before this function.");
9151   return DAG.getNode(
9152       ISD::CONCAT_VECTORS, DL, VT,
9153       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9154       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9155 }
9156 
9157 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9158   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9159   SDValue Mask = MSC->getMask();
9160   SDValue Chain = MSC->getChain();
9161   SDLoc DL(N);
9162 
9163   // Zap scatters with a zero mask.
9164   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9165     return Chain;
9166 
9167   return SDValue();
9168 }
9169 
9170 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9171   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9172   SDValue Mask = MST->getMask();
9173   SDValue Chain = MST->getChain();
9174   SDLoc DL(N);
9175 
9176   // Zap masked stores with a zero mask.
9177   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9178     return Chain;
9179 
9180   // Try transforming N to an indexed store.
9181   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9182     return SDValue(N, 0);
9183 
9184   return SDValue();
9185 }
9186 
9187 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9188   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9189   SDValue Mask = MGT->getMask();
9190   SDLoc DL(N);
9191 
9192   // Zap gathers with a zero mask.
9193   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9194     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
9195 
9196   return SDValue();
9197 }
9198 
9199 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9200   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9201   SDValue Mask = MLD->getMask();
9202   SDLoc DL(N);
9203 
9204   // Zap masked loads with a zero mask.
9205   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9206     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9207 
9208   // Try transforming N to an indexed load.
9209   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9210     return SDValue(N, 0);
9211 
9212   return SDValue();
9213 }
9214 
9215 /// A vector select of 2 constant vectors can be simplified to math/logic to
9216 /// avoid a variable select instruction and possibly avoid constant loads.
9217 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9218   SDValue Cond = N->getOperand(0);
9219   SDValue N1 = N->getOperand(1);
9220   SDValue N2 = N->getOperand(2);
9221   EVT VT = N->getValueType(0);
9222   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9223       !TLI.convertSelectOfConstantsToMath(VT) ||
9224       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9225       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9226     return SDValue();
9227 
9228   // Check if we can use the condition value to increment/decrement a single
9229   // constant value. This simplifies a select to an add and removes a constant
9230   // load/materialization from the general case.
9231   bool AllAddOne = true;
9232   bool AllSubOne = true;
9233   unsigned Elts = VT.getVectorNumElements();
9234   for (unsigned i = 0; i != Elts; ++i) {
9235     SDValue N1Elt = N1.getOperand(i);
9236     SDValue N2Elt = N2.getOperand(i);
9237     if (N1Elt.isUndef() || N2Elt.isUndef())
9238       continue;
9239     if (N1Elt.getValueType() != N2Elt.getValueType())
9240       continue;
9241 
9242     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9243     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9244     if (C1 != C2 + 1)
9245       AllAddOne = false;
9246     if (C1 != C2 - 1)
9247       AllSubOne = false;
9248   }
9249 
9250   // Further simplifications for the extra-special cases where the constants are
9251   // all 0 or all -1 should be implemented as folds of these patterns.
9252   SDLoc DL(N);
9253   if (AllAddOne || AllSubOne) {
9254     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9255     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9256     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9257     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9258     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9259   }
9260 
9261   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9262   APInt Pow2C;
9263   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9264       isNullOrNullSplat(N2)) {
9265     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9266     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9267     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9268   }
9269 
9270   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9271     return V;
9272 
9273   // The general case for select-of-constants:
9274   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9275   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9276   // leave that to a machine-specific pass.
9277   return SDValue();
9278 }
9279 
9280 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9281   SDValue N0 = N->getOperand(0);
9282   SDValue N1 = N->getOperand(1);
9283   SDValue N2 = N->getOperand(2);
9284   EVT VT = N->getValueType(0);
9285   SDLoc DL(N);
9286 
9287   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9288     return V;
9289 
9290   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9291   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9292     return DAG.getSelect(DL, VT, F, N2, N1);
9293 
9294   // Canonicalize integer abs.
9295   // vselect (setg[te] X,  0),  X, -X ->
9296   // vselect (setgt    X, -1),  X, -X ->
9297   // vselect (setl[te] X,  0), -X,  X ->
9298   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9299   if (N0.getOpcode() == ISD::SETCC) {
9300     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9301     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9302     bool isAbs = false;
9303     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9304 
9305     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9306          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9307         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9308       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9309     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9310              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9311       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9312 
9313     if (isAbs) {
9314       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9315         return DAG.getNode(ISD::ABS, DL, VT, LHS);
9316 
9317       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9318                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
9319                                                   DL, getShiftAmountTy(VT)));
9320       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9321       AddToWorklist(Shift.getNode());
9322       AddToWorklist(Add.getNode());
9323       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9324     }
9325 
9326     // vselect x, y (fcmp lt x, y) -> fminnum x, y
9327     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9328     //
9329     // This is OK if we don't care about what happens if either operand is a
9330     // NaN.
9331     //
9332     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9333       if (SDValue FMinMax =
9334               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9335         return FMinMax;
9336     }
9337 
9338     // If this select has a condition (setcc) with narrower operands than the
9339     // select, try to widen the compare to match the select width.
9340     // TODO: This should be extended to handle any constant.
9341     // TODO: This could be extended to handle non-loading patterns, but that
9342     //       requires thorough testing to avoid regressions.
9343     if (isNullOrNullSplat(RHS)) {
9344       EVT NarrowVT = LHS.getValueType();
9345       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9346       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9347       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9348       unsigned WideWidth = WideVT.getScalarSizeInBits();
9349       bool IsSigned = isSignedIntSetCC(CC);
9350       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9351       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
9352           SetCCWidth != 1 && SetCCWidth < WideWidth &&
9353           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
9354           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
9355         // Both compare operands can be widened for free. The LHS can use an
9356         // extended load, and the RHS is a constant:
9357         //   vselect (ext (setcc load(X), C)), N1, N2 -->
9358         //   vselect (setcc extload(X), C'), N1, N2
9359         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9360         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
9361         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
9362         EVT WideSetCCVT = getSetCCResultType(WideVT);
9363         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
9364         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
9365       }
9366     }
9367   }
9368 
9369   if (SimplifySelectOps(N, N1, N2))
9370     return SDValue(N, 0);  // Don't revisit N.
9371 
9372   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
9373   if (ISD::isBuildVectorAllOnes(N0.getNode()))
9374     return N1;
9375   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
9376   if (ISD::isBuildVectorAllZeros(N0.getNode()))
9377     return N2;
9378 
9379   // The ConvertSelectToConcatVector function is assuming both the above
9380   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
9381   // and addressed.
9382   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9383       N2.getOpcode() == ISD::CONCAT_VECTORS &&
9384       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
9385     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
9386       return CV;
9387   }
9388 
9389   if (SDValue V = foldVSelectOfConstants(N))
9390     return V;
9391 
9392   return SDValue();
9393 }
9394 
9395 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
9396   SDValue N0 = N->getOperand(0);
9397   SDValue N1 = N->getOperand(1);
9398   SDValue N2 = N->getOperand(2);
9399   SDValue N3 = N->getOperand(3);
9400   SDValue N4 = N->getOperand(4);
9401   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
9402 
9403   // fold select_cc lhs, rhs, x, x, cc -> x
9404   if (N2 == N3)
9405     return N2;
9406 
9407   // Determine if the condition we're dealing with is constant
9408   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
9409                                   CC, SDLoc(N), false)) {
9410     AddToWorklist(SCC.getNode());
9411 
9412     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
9413       if (!SCCC->isNullValue())
9414         return N2;    // cond always true -> true val
9415       else
9416         return N3;    // cond always false -> false val
9417     } else if (SCC->isUndef()) {
9418       // When the condition is UNDEF, just return the first operand. This is
9419       // coherent the DAG creation, no setcc node is created in this case
9420       return N2;
9421     } else if (SCC.getOpcode() == ISD::SETCC) {
9422       // Fold to a simpler select_cc
9423       SDValue SelectOp = DAG.getNode(
9424           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
9425           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
9426       SelectOp->setFlags(SCC->getFlags());
9427       return SelectOp;
9428     }
9429   }
9430 
9431   // If we can fold this based on the true/false value, do so.
9432   if (SimplifySelectOps(N, N2, N3))
9433     return SDValue(N, 0);  // Don't revisit N.
9434 
9435   // fold select_cc into other things, such as min/max/abs
9436   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
9437 }
9438 
9439 SDValue DAGCombiner::visitSETCC(SDNode *N) {
9440   // setcc is very commonly used as an argument to brcond. This pattern
9441   // also lend itself to numerous combines and, as a result, it is desired
9442   // we keep the argument to a brcond as a setcc as much as possible.
9443   bool PreferSetCC =
9444       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
9445 
9446   SDValue Combined = SimplifySetCC(
9447       N->getValueType(0), N->getOperand(0), N->getOperand(1),
9448       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
9449 
9450   if (!Combined)
9451     return SDValue();
9452 
9453   // If we prefer to have a setcc, and we don't, we'll try our best to
9454   // recreate one using rebuildSetCC.
9455   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
9456     SDValue NewSetCC = rebuildSetCC(Combined);
9457 
9458     // We don't have anything interesting to combine to.
9459     if (NewSetCC.getNode() == N)
9460       return SDValue();
9461 
9462     if (NewSetCC)
9463       return NewSetCC;
9464   }
9465 
9466   return Combined;
9467 }
9468 
9469 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
9470   SDValue LHS = N->getOperand(0);
9471   SDValue RHS = N->getOperand(1);
9472   SDValue Carry = N->getOperand(2);
9473   SDValue Cond = N->getOperand(3);
9474 
9475   // If Carry is false, fold to a regular SETCC.
9476   if (isNullConstant(Carry))
9477     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9478 
9479   return SDValue();
9480 }
9481 
9482 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9483 /// a build_vector of constants.
9484 /// This function is called by the DAGCombiner when visiting sext/zext/aext
9485 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9486 /// Vector extends are not folded if operations are legal; this is to
9487 /// avoid introducing illegal build_vector dag nodes.
9488 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9489                                          SelectionDAG &DAG, bool LegalTypes) {
9490   unsigned Opcode = N->getOpcode();
9491   SDValue N0 = N->getOperand(0);
9492   EVT VT = N->getValueType(0);
9493   SDLoc DL(N);
9494 
9495   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9496          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9497          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
9498          && "Expected EXTEND dag node in input!");
9499 
9500   // fold (sext c1) -> c1
9501   // fold (zext c1) -> c1
9502   // fold (aext c1) -> c1
9503   if (isa<ConstantSDNode>(N0))
9504     return DAG.getNode(Opcode, DL, VT, N0);
9505 
9506   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9507   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9508   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9509   if (N0->getOpcode() == ISD::SELECT) {
9510     SDValue Op1 = N0->getOperand(1);
9511     SDValue Op2 = N0->getOperand(2);
9512     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9513         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9514       // For any_extend, choose sign extension of the constants to allow a
9515       // possible further transform to sign_extend_inreg.i.e.
9516       //
9517       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9518       // t2: i64 = any_extend t1
9519       // -->
9520       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9521       // -->
9522       // t4: i64 = sign_extend_inreg t3
9523       unsigned FoldOpc = Opcode;
9524       if (FoldOpc == ISD::ANY_EXTEND)
9525         FoldOpc = ISD::SIGN_EXTEND;
9526       return DAG.getSelect(DL, VT, N0->getOperand(0),
9527                            DAG.getNode(FoldOpc, DL, VT, Op1),
9528                            DAG.getNode(FoldOpc, DL, VT, Op2));
9529     }
9530   }
9531 
9532   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9533   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9534   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9535   EVT SVT = VT.getScalarType();
9536   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
9537       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
9538     return SDValue();
9539 
9540   // We can fold this node into a build_vector.
9541   unsigned VTBits = SVT.getSizeInBits();
9542   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9543   SmallVector<SDValue, 8> Elts;
9544   unsigned NumElts = VT.getVectorNumElements();
9545 
9546   // For zero-extensions, UNDEF elements still guarantee to have the upper
9547   // bits set to zero.
9548   bool IsZext =
9549       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
9550 
9551   for (unsigned i = 0; i != NumElts; ++i) {
9552     SDValue Op = N0.getOperand(i);
9553     if (Op.isUndef()) {
9554       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
9555       continue;
9556     }
9557 
9558     SDLoc DL(Op);
9559     // Get the constant value and if needed trunc it to the size of the type.
9560     // Nodes like build_vector might have constants wider than the scalar type.
9561     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9562     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
9563       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9564     else
9565       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9566   }
9567 
9568   return DAG.getBuildVector(VT, DL, Elts);
9569 }
9570 
9571 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9572 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9573 // transformation. Returns true if extension are possible and the above
9574 // mentioned transformation is profitable.
9575 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
9576                                     unsigned ExtOpc,
9577                                     SmallVectorImpl<SDNode *> &ExtendNodes,
9578                                     const TargetLowering &TLI) {
9579   bool HasCopyToRegUses = false;
9580   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9581   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9582                             UE = N0.getNode()->use_end();
9583        UI != UE; ++UI) {
9584     SDNode *User = *UI;
9585     if (User == N)
9586       continue;
9587     if (UI.getUse().getResNo() != N0.getResNo())
9588       continue;
9589     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9590     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
9591       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9592       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
9593         // Sign bits will be lost after a zext.
9594         return false;
9595       bool Add = false;
9596       for (unsigned i = 0; i != 2; ++i) {
9597         SDValue UseOp = User->getOperand(i);
9598         if (UseOp == N0)
9599           continue;
9600         if (!isa<ConstantSDNode>(UseOp))
9601           return false;
9602         Add = true;
9603       }
9604       if (Add)
9605         ExtendNodes.push_back(User);
9606       continue;
9607     }
9608     // If truncates aren't free and there are users we can't
9609     // extend, it isn't worthwhile.
9610     if (!isTruncFree)
9611       return false;
9612     // Remember if this value is live-out.
9613     if (User->getOpcode() == ISD::CopyToReg)
9614       HasCopyToRegUses = true;
9615   }
9616 
9617   if (HasCopyToRegUses) {
9618     bool BothLiveOut = false;
9619     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9620          UI != UE; ++UI) {
9621       SDUse &Use = UI.getUse();
9622       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9623         BothLiveOut = true;
9624         break;
9625       }
9626     }
9627     if (BothLiveOut)
9628       // Both unextended and extended values are live out. There had better be
9629       // a good reason for the transformation.
9630       return ExtendNodes.size();
9631   }
9632   return true;
9633 }
9634 
9635 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9636                                   SDValue OrigLoad, SDValue ExtLoad,
9637                                   ISD::NodeType ExtType) {
9638   // Extend SetCC uses if necessary.
9639   SDLoc DL(ExtLoad);
9640   for (SDNode *SetCC : SetCCs) {
9641     SmallVector<SDValue, 4> Ops;
9642 
9643     for (unsigned j = 0; j != 2; ++j) {
9644       SDValue SOp = SetCC->getOperand(j);
9645       if (SOp == OrigLoad)
9646         Ops.push_back(ExtLoad);
9647       else
9648         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9649     }
9650 
9651     Ops.push_back(SetCC->getOperand(2));
9652     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9653   }
9654 }
9655 
9656 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9657 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9658   SDValue N0 = N->getOperand(0);
9659   EVT DstVT = N->getValueType(0);
9660   EVT SrcVT = N0.getValueType();
9661 
9662   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9663           N->getOpcode() == ISD::ZERO_EXTEND) &&
9664          "Unexpected node type (not an extend)!");
9665 
9666   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9667   // For example, on a target with legal v4i32, but illegal v8i32, turn:
9668   //   (v8i32 (sext (v8i16 (load x))))
9669   // into:
9670   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
9671   //                          (v4i32 (sextload (x + 16)))))
9672   // Where uses of the original load, i.e.:
9673   //   (v8i16 (load x))
9674   // are replaced with:
9675   //   (v8i16 (truncate
9676   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
9677   //                            (v4i32 (sextload (x + 16)))))))
9678   //
9679   // This combine is only applicable to illegal, but splittable, vectors.
9680   // All legal types, and illegal non-vector types, are handled elsewhere.
9681   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9682   //
9683   if (N0->getOpcode() != ISD::LOAD)
9684     return SDValue();
9685 
9686   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9687 
9688   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9689       !N0.hasOneUse() || !LN0->isSimple() ||
9690       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
9691       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9692     return SDValue();
9693 
9694   SmallVector<SDNode *, 4> SetCCs;
9695   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9696     return SDValue();
9697 
9698   ISD::LoadExtType ExtType =
9699       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9700 
9701   // Try to split the vector types to get down to legal types.
9702   EVT SplitSrcVT = SrcVT;
9703   EVT SplitDstVT = DstVT;
9704   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9705          SplitSrcVT.getVectorNumElements() > 1) {
9706     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9707     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9708   }
9709 
9710   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9711     return SDValue();
9712 
9713   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
9714 
9715   SDLoc DL(N);
9716   const unsigned NumSplits =
9717       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9718   const unsigned Stride = SplitSrcVT.getStoreSize();
9719   SmallVector<SDValue, 4> Loads;
9720   SmallVector<SDValue, 4> Chains;
9721 
9722   SDValue BasePtr = LN0->getBasePtr();
9723   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9724     const unsigned Offset = Idx * Stride;
9725     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9726 
9727     SDValue SplitLoad = DAG.getExtLoad(
9728         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9729         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9730         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9731 
9732     BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
9733 
9734     Loads.push_back(SplitLoad.getValue(0));
9735     Chains.push_back(SplitLoad.getValue(1));
9736   }
9737 
9738   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9739   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9740 
9741   // Simplify TF.
9742   AddToWorklist(NewChain.getNode());
9743 
9744   CombineTo(N, NewValue);
9745 
9746   // Replace uses of the original load (before extension)
9747   // with a truncate of the concatenated sextloaded vectors.
9748   SDValue Trunc =
9749       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9750   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9751   CombineTo(N0.getNode(), Trunc, NewChain);
9752   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9753 }
9754 
9755 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9756 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9757 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9758   assert(N->getOpcode() == ISD::ZERO_EXTEND);
9759   EVT VT = N->getValueType(0);
9760   EVT OrigVT = N->getOperand(0).getValueType();
9761   if (TLI.isZExtFree(OrigVT, VT))
9762     return SDValue();
9763 
9764   // and/or/xor
9765   SDValue N0 = N->getOperand(0);
9766   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9767         N0.getOpcode() == ISD::XOR) ||
9768       N0.getOperand(1).getOpcode() != ISD::Constant ||
9769       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9770     return SDValue();
9771 
9772   // shl/shr
9773   SDValue N1 = N0->getOperand(0);
9774   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9775       N1.getOperand(1).getOpcode() != ISD::Constant ||
9776       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9777     return SDValue();
9778 
9779   // load
9780   if (!isa<LoadSDNode>(N1.getOperand(0)))
9781     return SDValue();
9782   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9783   EVT MemVT = Load->getMemoryVT();
9784   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9785       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9786     return SDValue();
9787 
9788 
9789   // If the shift op is SHL, the logic op must be AND, otherwise the result
9790   // will be wrong.
9791   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9792     return SDValue();
9793 
9794   if (!N0.hasOneUse() || !N1.hasOneUse())
9795     return SDValue();
9796 
9797   SmallVector<SDNode*, 4> SetCCs;
9798   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9799                                ISD::ZERO_EXTEND, SetCCs, TLI))
9800     return SDValue();
9801 
9802   // Actually do the transformation.
9803   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9804                                    Load->getChain(), Load->getBasePtr(),
9805                                    Load->getMemoryVT(), Load->getMemOperand());
9806 
9807   SDLoc DL1(N1);
9808   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9809                               N1.getOperand(1));
9810 
9811   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
9812   SDLoc DL0(N0);
9813   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9814                             DAG.getConstant(Mask, DL0, VT));
9815 
9816   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9817   CombineTo(N, And);
9818   if (SDValue(Load, 0).hasOneUse()) {
9819     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9820   } else {
9821     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9822                                 Load->getValueType(0), ExtLoad);
9823     CombineTo(Load, Trunc, ExtLoad.getValue(1));
9824   }
9825 
9826   // N0 is dead at this point.
9827   recursivelyDeleteUnusedNodes(N0.getNode());
9828 
9829   return SDValue(N,0); // Return N so it doesn't get rechecked!
9830 }
9831 
9832 /// If we're narrowing or widening the result of a vector select and the final
9833 /// size is the same size as a setcc (compare) feeding the select, then try to
9834 /// apply the cast operation to the select's operands because matching vector
9835 /// sizes for a select condition and other operands should be more efficient.
9836 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9837   unsigned CastOpcode = Cast->getOpcode();
9838   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9839           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9840           CastOpcode == ISD::FP_ROUND) &&
9841          "Unexpected opcode for vector select narrowing/widening");
9842 
9843   // We only do this transform before legal ops because the pattern may be
9844   // obfuscated by target-specific operations after legalization. Do not create
9845   // an illegal select op, however, because that may be difficult to lower.
9846   EVT VT = Cast->getValueType(0);
9847   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9848     return SDValue();
9849 
9850   SDValue VSel = Cast->getOperand(0);
9851   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9852       VSel.getOperand(0).getOpcode() != ISD::SETCC)
9853     return SDValue();
9854 
9855   // Does the setcc have the same vector size as the casted select?
9856   SDValue SetCC = VSel.getOperand(0);
9857   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9858   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9859     return SDValue();
9860 
9861   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9862   SDValue A = VSel.getOperand(1);
9863   SDValue B = VSel.getOperand(2);
9864   SDValue CastA, CastB;
9865   SDLoc DL(Cast);
9866   if (CastOpcode == ISD::FP_ROUND) {
9867     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9868     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9869     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9870   } else {
9871     CastA = DAG.getNode(CastOpcode, DL, VT, A);
9872     CastB = DAG.getNode(CastOpcode, DL, VT, B);
9873   }
9874   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9875 }
9876 
9877 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9878 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9879 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9880                                      const TargetLowering &TLI, EVT VT,
9881                                      bool LegalOperations, SDNode *N,
9882                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
9883   SDNode *N0Node = N0.getNode();
9884   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9885                                                    : ISD::isZEXTLoad(N0Node);
9886   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9887       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9888     return SDValue();
9889 
9890   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9891   EVT MemVT = LN0->getMemoryVT();
9892   if ((LegalOperations || !LN0->isSimple() ||
9893        VT.isVector()) &&
9894       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9895     return SDValue();
9896 
9897   SDValue ExtLoad =
9898       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9899                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9900   Combiner.CombineTo(N, ExtLoad);
9901   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9902   if (LN0->use_empty())
9903     Combiner.recursivelyDeleteUnusedNodes(LN0);
9904   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9905 }
9906 
9907 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9908 // Only generate vector extloads when 1) they're legal, and 2) they are
9909 // deemed desirable by the target.
9910 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9911                                   const TargetLowering &TLI, EVT VT,
9912                                   bool LegalOperations, SDNode *N, SDValue N0,
9913                                   ISD::LoadExtType ExtLoadType,
9914                                   ISD::NodeType ExtOpc) {
9915   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9916       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
9917       ((LegalOperations || VT.isVector() ||
9918         !cast<LoadSDNode>(N0)->isSimple()) &&
9919        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9920     return {};
9921 
9922   bool DoXform = true;
9923   SmallVector<SDNode *, 4> SetCCs;
9924   if (!N0.hasOneUse())
9925     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9926   if (VT.isVector())
9927     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9928   if (!DoXform)
9929     return {};
9930 
9931   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9932   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9933                                    LN0->getBasePtr(), N0.getValueType(),
9934                                    LN0->getMemOperand());
9935   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9936   // If the load value is used only by N, replace it via CombineTo N.
9937   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9938   Combiner.CombineTo(N, ExtLoad);
9939   if (NoReplaceTrunc) {
9940     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9941     Combiner.recursivelyDeleteUnusedNodes(LN0);
9942   } else {
9943     SDValue Trunc =
9944         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9945     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9946   }
9947   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9948 }
9949 
9950 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
9951                                         const TargetLowering &TLI, EVT VT,
9952                                         SDNode *N, SDValue N0,
9953                                         ISD::LoadExtType ExtLoadType,
9954                                         ISD::NodeType ExtOpc) {
9955   if (!N0.hasOneUse())
9956     return SDValue();
9957 
9958   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
9959   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
9960     return SDValue();
9961 
9962   if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
9963     return SDValue();
9964 
9965   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9966     return SDValue();
9967 
9968   SDLoc dl(Ld);
9969   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
9970   SDValue NewLoad = DAG.getMaskedLoad(
9971       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
9972       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
9973       ExtLoadType, Ld->isExpandingLoad());
9974   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
9975   return NewLoad;
9976 }
9977 
9978 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9979                                        bool LegalOperations) {
9980   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9981           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9982 
9983   SDValue SetCC = N->getOperand(0);
9984   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9985       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9986     return SDValue();
9987 
9988   SDValue X = SetCC.getOperand(0);
9989   SDValue Ones = SetCC.getOperand(1);
9990   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9991   EVT VT = N->getValueType(0);
9992   EVT XVT = X.getValueType();
9993   // setge X, C is canonicalized to setgt, so we do not need to match that
9994   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9995   // not require the 'not' op.
9996   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9997     // Invert and smear/shift the sign bit:
9998     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9999     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10000     SDLoc DL(N);
10001     unsigned ShCt = VT.getSizeInBits() - 1;
10002     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10003     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10004       SDValue NotX = DAG.getNOT(DL, X, VT);
10005       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10006       auto ShiftOpcode =
10007         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10008       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10009     }
10010   }
10011   return SDValue();
10012 }
10013 
10014 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
10015   SDValue N0 = N->getOperand(0);
10016   EVT VT = N->getValueType(0);
10017   SDLoc DL(N);
10018 
10019   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10020     return Res;
10021 
10022   // fold (sext (sext x)) -> (sext x)
10023   // fold (sext (aext x)) -> (sext x)
10024   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10025     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
10026 
10027   if (N0.getOpcode() == ISD::TRUNCATE) {
10028     // fold (sext (truncate (load x))) -> (sext (smaller load x))
10029     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
10030     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10031       SDNode *oye = N0.getOperand(0).getNode();
10032       if (NarrowLoad.getNode() != N0.getNode()) {
10033         CombineTo(N0.getNode(), NarrowLoad);
10034         // CombineTo deleted the truncate, if needed, but not what's under it.
10035         AddToWorklist(oye);
10036       }
10037       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10038     }
10039 
10040     // See if the value being truncated is already sign extended.  If so, just
10041     // eliminate the trunc/sext pair.
10042     SDValue Op = N0.getOperand(0);
10043     unsigned OpBits   = Op.getScalarValueSizeInBits();
10044     unsigned MidBits  = N0.getScalarValueSizeInBits();
10045     unsigned DestBits = VT.getScalarSizeInBits();
10046     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
10047 
10048     if (OpBits == DestBits) {
10049       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
10050       // bits, it is already ready.
10051       if (NumSignBits > DestBits-MidBits)
10052         return Op;
10053     } else if (OpBits < DestBits) {
10054       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
10055       // bits, just sext from i32.
10056       if (NumSignBits > OpBits-MidBits)
10057         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
10058     } else {
10059       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
10060       // bits, just truncate to i32.
10061       if (NumSignBits > OpBits-MidBits)
10062         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
10063     }
10064 
10065     // fold (sext (truncate x)) -> (sextinreg x).
10066     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
10067                                                  N0.getValueType())) {
10068       if (OpBits < DestBits)
10069         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
10070       else if (OpBits > DestBits)
10071         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
10072       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
10073                          DAG.getValueType(N0.getValueType()));
10074     }
10075   }
10076 
10077   // Try to simplify (sext (load x)).
10078   if (SDValue foldedExt =
10079           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10080                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
10081     return foldedExt;
10082 
10083   if (SDValue foldedExt =
10084       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
10085                                ISD::SIGN_EXTEND))
10086     return foldedExt;
10087 
10088   // fold (sext (load x)) to multiple smaller sextloads.
10089   // Only on illegal but splittable vectors.
10090   if (SDValue ExtLoad = CombineExtLoad(N))
10091     return ExtLoad;
10092 
10093   // Try to simplify (sext (sextload x)).
10094   if (SDValue foldedExt = tryToFoldExtOfExtload(
10095           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
10096     return foldedExt;
10097 
10098   // fold (sext (and/or/xor (load x), cst)) ->
10099   //      (and/or/xor (sextload x), (sext cst))
10100   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10101        N0.getOpcode() == ISD::XOR) &&
10102       isa<LoadSDNode>(N0.getOperand(0)) &&
10103       N0.getOperand(1).getOpcode() == ISD::Constant &&
10104       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10105     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10106     EVT MemVT = LN00->getMemoryVT();
10107     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
10108       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
10109       SmallVector<SDNode*, 4> SetCCs;
10110       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10111                                              ISD::SIGN_EXTEND, SetCCs, TLI);
10112       if (DoXform) {
10113         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
10114                                          LN00->getChain(), LN00->getBasePtr(),
10115                                          LN00->getMemoryVT(),
10116                                          LN00->getMemOperand());
10117         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
10118         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10119                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10120         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
10121         bool NoReplaceTruncAnd = !N0.hasOneUse();
10122         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10123         CombineTo(N, And);
10124         // If N0 has multiple uses, change other uses as well.
10125         if (NoReplaceTruncAnd) {
10126           SDValue TruncAnd =
10127               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10128           CombineTo(N0.getNode(), TruncAnd);
10129         }
10130         if (NoReplaceTrunc) {
10131           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10132         } else {
10133           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10134                                       LN00->getValueType(0), ExtLoad);
10135           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10136         }
10137         return SDValue(N,0); // Return N so it doesn't get rechecked!
10138       }
10139     }
10140   }
10141 
10142   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10143     return V;
10144 
10145   if (N0.getOpcode() == ISD::SETCC) {
10146     SDValue N00 = N0.getOperand(0);
10147     SDValue N01 = N0.getOperand(1);
10148     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10149     EVT N00VT = N0.getOperand(0).getValueType();
10150 
10151     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
10152     // Only do this before legalize for now.
10153     if (VT.isVector() && !LegalOperations &&
10154         TLI.getBooleanContents(N00VT) ==
10155             TargetLowering::ZeroOrNegativeOneBooleanContent) {
10156       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10157       // of the same size as the compared operands. Only optimize sext(setcc())
10158       // if this is the case.
10159       EVT SVT = getSetCCResultType(N00VT);
10160 
10161       // If we already have the desired type, don't change it.
10162       if (SVT != N0.getValueType()) {
10163         // We know that the # elements of the results is the same as the
10164         // # elements of the compare (and the # elements of the compare result
10165         // for that matter).  Check to see that they are the same size.  If so,
10166         // we know that the element size of the sext'd result matches the
10167         // element size of the compare operands.
10168         if (VT.getSizeInBits() == SVT.getSizeInBits())
10169           return DAG.getSetCC(DL, VT, N00, N01, CC);
10170 
10171         // If the desired elements are smaller or larger than the source
10172         // elements, we can use a matching integer vector type and then
10173         // truncate/sign extend.
10174         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10175         if (SVT == MatchingVecType) {
10176           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10177           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10178         }
10179       }
10180     }
10181 
10182     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
10183     // Here, T can be 1 or -1, depending on the type of the setcc and
10184     // getBooleanContents().
10185     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
10186 
10187     // To determine the "true" side of the select, we need to know the high bit
10188     // of the value returned by the setcc if it evaluates to true.
10189     // If the type of the setcc is i1, then the true case of the select is just
10190     // sext(i1 1), that is, -1.
10191     // If the type of the setcc is larger (say, i8) then the value of the high
10192     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
10193     // of the appropriate width.
10194     SDValue ExtTrueVal = (SetCCWidth == 1)
10195                              ? DAG.getAllOnesConstant(DL, VT)
10196                              : DAG.getBoolConstant(true, DL, VT, N00VT);
10197     SDValue Zero = DAG.getConstant(0, DL, VT);
10198     if (SDValue SCC =
10199             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
10200       return SCC;
10201 
10202     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
10203       EVT SetCCVT = getSetCCResultType(N00VT);
10204       // Don't do this transform for i1 because there's a select transform
10205       // that would reverse it.
10206       // TODO: We should not do this transform at all without a target hook
10207       // because a sext is likely cheaper than a select?
10208       if (SetCCVT.getScalarSizeInBits() != 1 &&
10209           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
10210         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
10211         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
10212       }
10213     }
10214   }
10215 
10216   // fold (sext x) -> (zext x) if the sign bit is known zero.
10217   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
10218       DAG.SignBitIsZero(N0))
10219     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
10220 
10221   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10222     return NewVSel;
10223 
10224   // Eliminate this sign extend by doing a negation in the destination type:
10225   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
10226   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
10227       isNullOrNullSplat(N0.getOperand(0)) &&
10228       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
10229       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
10230     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
10231     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
10232   }
10233   // Eliminate this sign extend by doing a decrement in the destination type:
10234   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
10235   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
10236       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
10237       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10238       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
10239     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
10240     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10241   }
10242 
10243   return SDValue();
10244 }
10245 
10246 // isTruncateOf - If N is a truncate of some other value, return true, record
10247 // the value being truncated in Op and which of Op's bits are zero/one in Known.
10248 // This function computes KnownBits to avoid a duplicated call to
10249 // computeKnownBits in the caller.
10250 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
10251                          KnownBits &Known) {
10252   if (N->getOpcode() == ISD::TRUNCATE) {
10253     Op = N->getOperand(0);
10254     Known = DAG.computeKnownBits(Op);
10255     return true;
10256   }
10257 
10258   if (N.getOpcode() != ISD::SETCC ||
10259       N.getValueType().getScalarType() != MVT::i1 ||
10260       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
10261     return false;
10262 
10263   SDValue Op0 = N->getOperand(0);
10264   SDValue Op1 = N->getOperand(1);
10265   assert(Op0.getValueType() == Op1.getValueType());
10266 
10267   if (isNullOrNullSplat(Op0))
10268     Op = Op1;
10269   else if (isNullOrNullSplat(Op1))
10270     Op = Op0;
10271   else
10272     return false;
10273 
10274   Known = DAG.computeKnownBits(Op);
10275 
10276   return (Known.Zero | 1).isAllOnesValue();
10277 }
10278 
10279 /// Given an extending node with a pop-count operand, if the target does not
10280 /// support a pop-count in the narrow source type but does support it in the
10281 /// destination type, widen the pop-count to the destination type.
10282 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
10283   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
10284           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
10285 
10286   SDValue CtPop = Extend->getOperand(0);
10287   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
10288     return SDValue();
10289 
10290   EVT VT = Extend->getValueType(0);
10291   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10292   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
10293       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
10294     return SDValue();
10295 
10296   // zext (ctpop X) --> ctpop (zext X)
10297   SDLoc DL(Extend);
10298   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
10299   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
10300 }
10301 
10302 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
10303   SDValue N0 = N->getOperand(0);
10304   EVT VT = N->getValueType(0);
10305 
10306   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10307     return Res;
10308 
10309   // fold (zext (zext x)) -> (zext x)
10310   // fold (zext (aext x)) -> (zext x)
10311   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10312     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
10313                        N0.getOperand(0));
10314 
10315   // fold (zext (truncate x)) -> (zext x) or
10316   //      (zext (truncate x)) -> (truncate x)
10317   // This is valid when the truncated bits of x are already zero.
10318   SDValue Op;
10319   KnownBits Known;
10320   if (isTruncateOf(DAG, N0, Op, Known)) {
10321     APInt TruncatedBits =
10322       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
10323       APInt(Op.getScalarValueSizeInBits(), 0) :
10324       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
10325                         N0.getScalarValueSizeInBits(),
10326                         std::min(Op.getScalarValueSizeInBits(),
10327                                  VT.getScalarSizeInBits()));
10328     if (TruncatedBits.isSubsetOf(Known.Zero))
10329       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10330   }
10331 
10332   // fold (zext (truncate x)) -> (and x, mask)
10333   if (N0.getOpcode() == ISD::TRUNCATE) {
10334     // fold (zext (truncate (load x))) -> (zext (smaller load x))
10335     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
10336     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10337       SDNode *oye = N0.getOperand(0).getNode();
10338       if (NarrowLoad.getNode() != N0.getNode()) {
10339         CombineTo(N0.getNode(), NarrowLoad);
10340         // CombineTo deleted the truncate, if needed, but not what's under it.
10341         AddToWorklist(oye);
10342       }
10343       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10344     }
10345 
10346     EVT SrcVT = N0.getOperand(0).getValueType();
10347     EVT MinVT = N0.getValueType();
10348 
10349     // Try to mask before the extension to avoid having to generate a larger mask,
10350     // possibly over several sub-vectors.
10351     if (SrcVT.bitsLT(VT) && VT.isVector()) {
10352       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
10353                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
10354         SDValue Op = N0.getOperand(0);
10355         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10356         AddToWorklist(Op.getNode());
10357         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10358         // Transfer the debug info; the new node is equivalent to N0.
10359         DAG.transferDbgValues(N0, ZExtOrTrunc);
10360         return ZExtOrTrunc;
10361       }
10362     }
10363 
10364     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
10365       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10366       AddToWorklist(Op.getNode());
10367       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10368       // We may safely transfer the debug info describing the truncate node over
10369       // to the equivalent and operation.
10370       DAG.transferDbgValues(N0, And);
10371       return And;
10372     }
10373   }
10374 
10375   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
10376   // if either of the casts is not free.
10377   if (N0.getOpcode() == ISD::AND &&
10378       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10379       N0.getOperand(1).getOpcode() == ISD::Constant &&
10380       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10381                            N0.getValueType()) ||
10382        !TLI.isZExtFree(N0.getValueType(), VT))) {
10383     SDValue X = N0.getOperand(0).getOperand(0);
10384     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
10385     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10386     SDLoc DL(N);
10387     return DAG.getNode(ISD::AND, DL, VT,
10388                        X, DAG.getConstant(Mask, DL, VT));
10389   }
10390 
10391   // Try to simplify (zext (load x)).
10392   if (SDValue foldedExt =
10393           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10394                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
10395     return foldedExt;
10396 
10397   if (SDValue foldedExt =
10398       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
10399                                ISD::ZERO_EXTEND))
10400     return foldedExt;
10401 
10402   // fold (zext (load x)) to multiple smaller zextloads.
10403   // Only on illegal but splittable vectors.
10404   if (SDValue ExtLoad = CombineExtLoad(N))
10405     return ExtLoad;
10406 
10407   // fold (zext (and/or/xor (load x), cst)) ->
10408   //      (and/or/xor (zextload x), (zext cst))
10409   // Unless (and (load x) cst) will match as a zextload already and has
10410   // additional users.
10411   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10412        N0.getOpcode() == ISD::XOR) &&
10413       isa<LoadSDNode>(N0.getOperand(0)) &&
10414       N0.getOperand(1).getOpcode() == ISD::Constant &&
10415       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10416     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10417     EVT MemVT = LN00->getMemoryVT();
10418     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
10419         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
10420       bool DoXform = true;
10421       SmallVector<SDNode*, 4> SetCCs;
10422       if (!N0.hasOneUse()) {
10423         if (N0.getOpcode() == ISD::AND) {
10424           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
10425           EVT LoadResultTy = AndC->getValueType(0);
10426           EVT ExtVT;
10427           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
10428             DoXform = false;
10429         }
10430       }
10431       if (DoXform)
10432         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10433                                           ISD::ZERO_EXTEND, SetCCs, TLI);
10434       if (DoXform) {
10435         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
10436                                          LN00->getChain(), LN00->getBasePtr(),
10437                                          LN00->getMemoryVT(),
10438                                          LN00->getMemOperand());
10439         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10440         SDLoc DL(N);
10441         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10442                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10443         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10444         bool NoReplaceTruncAnd = !N0.hasOneUse();
10445         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10446         CombineTo(N, And);
10447         // If N0 has multiple uses, change other uses as well.
10448         if (NoReplaceTruncAnd) {
10449           SDValue TruncAnd =
10450               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10451           CombineTo(N0.getNode(), TruncAnd);
10452         }
10453         if (NoReplaceTrunc) {
10454           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10455         } else {
10456           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10457                                       LN00->getValueType(0), ExtLoad);
10458           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10459         }
10460         return SDValue(N,0); // Return N so it doesn't get rechecked!
10461       }
10462     }
10463   }
10464 
10465   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10466   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10467   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
10468     return ZExtLoad;
10469 
10470   // Try to simplify (zext (zextload x)).
10471   if (SDValue foldedExt = tryToFoldExtOfExtload(
10472           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
10473     return foldedExt;
10474 
10475   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10476     return V;
10477 
10478   if (N0.getOpcode() == ISD::SETCC) {
10479     // Only do this before legalize for now.
10480     if (!LegalOperations && VT.isVector() &&
10481         N0.getValueType().getVectorElementType() == MVT::i1) {
10482       EVT N00VT = N0.getOperand(0).getValueType();
10483       if (getSetCCResultType(N00VT) == N0.getValueType())
10484         return SDValue();
10485 
10486       // We know that the # elements of the results is the same as the #
10487       // elements of the compare (and the # elements of the compare result for
10488       // that matter). Check to see that they are the same size. If so, we know
10489       // that the element size of the sext'd result matches the element size of
10490       // the compare operands.
10491       SDLoc DL(N);
10492       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
10493         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
10494         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
10495                                      N0.getOperand(1), N0.getOperand(2));
10496         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
10497       }
10498 
10499       // If the desired elements are smaller or larger than the source
10500       // elements we can use a matching integer vector type and then
10501       // truncate/any extend followed by zext_in_reg.
10502       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10503       SDValue VsetCC =
10504           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
10505                       N0.getOperand(1), N0.getOperand(2));
10506       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
10507                                     N0.getValueType());
10508     }
10509 
10510     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10511     SDLoc DL(N);
10512     if (SDValue SCC = SimplifySelectCC(
10513             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10514             DAG.getConstant(0, DL, VT),
10515             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10516       return SCC;
10517   }
10518 
10519   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
10520   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10521       isa<ConstantSDNode>(N0.getOperand(1)) &&
10522       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10523       N0.hasOneUse()) {
10524     SDValue ShAmt = N0.getOperand(1);
10525     if (N0.getOpcode() == ISD::SHL) {
10526       SDValue InnerZExt = N0.getOperand(0);
10527       // If the original shl may be shifting out bits, do not perform this
10528       // transformation.
10529       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
10530         InnerZExt.getOperand(0).getValueSizeInBits();
10531       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
10532         return SDValue();
10533     }
10534 
10535     SDLoc DL(N);
10536 
10537     // Ensure that the shift amount is wide enough for the shifted value.
10538     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
10539       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
10540 
10541     return DAG.getNode(N0.getOpcode(), DL, VT,
10542                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10543                        ShAmt);
10544   }
10545 
10546   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10547     return NewVSel;
10548 
10549   if (SDValue NewCtPop = widenCtPop(N, DAG))
10550     return NewCtPop;
10551 
10552   return SDValue();
10553 }
10554 
10555 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10556   SDValue N0 = N->getOperand(0);
10557   EVT VT = N->getValueType(0);
10558 
10559   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10560     return Res;
10561 
10562   // fold (aext (aext x)) -> (aext x)
10563   // fold (aext (zext x)) -> (zext x)
10564   // fold (aext (sext x)) -> (sext x)
10565   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
10566       N0.getOpcode() == ISD::ZERO_EXTEND ||
10567       N0.getOpcode() == ISD::SIGN_EXTEND)
10568     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10569 
10570   // fold (aext (truncate (load x))) -> (aext (smaller load x))
10571   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10572   if (N0.getOpcode() == ISD::TRUNCATE) {
10573     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10574       SDNode *oye = N0.getOperand(0).getNode();
10575       if (NarrowLoad.getNode() != N0.getNode()) {
10576         CombineTo(N0.getNode(), NarrowLoad);
10577         // CombineTo deleted the truncate, if needed, but not what's under it.
10578         AddToWorklist(oye);
10579       }
10580       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10581     }
10582   }
10583 
10584   // fold (aext (truncate x))
10585   if (N0.getOpcode() == ISD::TRUNCATE)
10586     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10587 
10588   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10589   // if the trunc is not free.
10590   if (N0.getOpcode() == ISD::AND &&
10591       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10592       N0.getOperand(1).getOpcode() == ISD::Constant &&
10593       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10594                           N0.getValueType())) {
10595     SDLoc DL(N);
10596     SDValue X = N0.getOperand(0).getOperand(0);
10597     X = DAG.getAnyExtOrTrunc(X, DL, VT);
10598     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10599     return DAG.getNode(ISD::AND, DL, VT,
10600                        X, DAG.getConstant(Mask, DL, VT));
10601   }
10602 
10603   // fold (aext (load x)) -> (aext (truncate (extload x)))
10604   // None of the supported targets knows how to perform load and any_ext
10605   // on vectors in one instruction.  We only perform this transformation on
10606   // scalars.
10607   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
10608       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10609       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10610     bool DoXform = true;
10611     SmallVector<SDNode*, 4> SetCCs;
10612     if (!N0.hasOneUse())
10613       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
10614                                         TLI);
10615     if (DoXform) {
10616       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10617       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10618                                        LN0->getChain(),
10619                                        LN0->getBasePtr(), N0.getValueType(),
10620                                        LN0->getMemOperand());
10621       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10622       // If the load value is used only by N, replace it via CombineTo N.
10623       bool NoReplaceTrunc = N0.hasOneUse();
10624       CombineTo(N, ExtLoad);
10625       if (NoReplaceTrunc) {
10626         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10627         recursivelyDeleteUnusedNodes(LN0);
10628       } else {
10629         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10630                                     N0.getValueType(), ExtLoad);
10631         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10632       }
10633       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10634     }
10635   }
10636 
10637   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10638   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10639   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
10640   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10641       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10642     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10643     ISD::LoadExtType ExtType = LN0->getExtensionType();
10644     EVT MemVT = LN0->getMemoryVT();
10645     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10646       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10647                                        VT, LN0->getChain(), LN0->getBasePtr(),
10648                                        MemVT, LN0->getMemOperand());
10649       CombineTo(N, ExtLoad);
10650       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10651       recursivelyDeleteUnusedNodes(LN0);
10652       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10653     }
10654   }
10655 
10656   if (N0.getOpcode() == ISD::SETCC) {
10657     // For vectors:
10658     // aext(setcc) -> vsetcc
10659     // aext(setcc) -> truncate(vsetcc)
10660     // aext(setcc) -> aext(vsetcc)
10661     // Only do this before legalize for now.
10662     if (VT.isVector() && !LegalOperations) {
10663       EVT N00VT = N0.getOperand(0).getValueType();
10664       if (getSetCCResultType(N00VT) == N0.getValueType())
10665         return SDValue();
10666 
10667       // We know that the # elements of the results is the same as the
10668       // # elements of the compare (and the # elements of the compare result
10669       // for that matter).  Check to see that they are the same size.  If so,
10670       // we know that the element size of the sext'd result matches the
10671       // element size of the compare operands.
10672       if (VT.getSizeInBits() == N00VT.getSizeInBits())
10673         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10674                              N0.getOperand(1),
10675                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
10676 
10677       // If the desired elements are smaller or larger than the source
10678       // elements we can use a matching integer vector type and then
10679       // truncate/any extend
10680       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10681       SDValue VsetCC =
10682         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10683                       N0.getOperand(1),
10684                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
10685       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10686     }
10687 
10688     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10689     SDLoc DL(N);
10690     if (SDValue SCC = SimplifySelectCC(
10691             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10692             DAG.getConstant(0, DL, VT),
10693             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10694       return SCC;
10695   }
10696 
10697   if (SDValue NewCtPop = widenCtPop(N, DAG))
10698     return NewCtPop;
10699 
10700   return SDValue();
10701 }
10702 
10703 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10704   unsigned Opcode = N->getOpcode();
10705   SDValue N0 = N->getOperand(0);
10706   SDValue N1 = N->getOperand(1);
10707   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10708 
10709   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10710   if (N0.getOpcode() == Opcode &&
10711       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10712     return N0;
10713 
10714   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10715       N0.getOperand(0).getOpcode() == Opcode) {
10716     // We have an assert, truncate, assert sandwich. Make one stronger assert
10717     // by asserting on the smallest asserted type to the larger source type.
10718     // This eliminates the later assert:
10719     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10720     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10721     SDValue BigA = N0.getOperand(0);
10722     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10723     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10724            "Asserting zero/sign-extended bits to a type larger than the "
10725            "truncated destination does not provide information");
10726 
10727     SDLoc DL(N);
10728     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10729     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10730     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10731                                     BigA.getOperand(0), MinAssertVTVal);
10732     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10733   }
10734 
10735   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10736   // than X. Just move the AssertZext in front of the truncate and drop the
10737   // AssertSExt.
10738   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10739       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10740       Opcode == ISD::AssertZext) {
10741     SDValue BigA = N0.getOperand(0);
10742     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10743     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10744            "Asserting zero/sign-extended bits to a type larger than the "
10745            "truncated destination does not provide information");
10746 
10747     if (AssertVT.bitsLT(BigA_AssertVT)) {
10748       SDLoc DL(N);
10749       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10750                                       BigA.getOperand(0), N1);
10751       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10752     }
10753   }
10754 
10755   return SDValue();
10756 }
10757 
10758 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
10759   SDLoc DL(N);
10760 
10761   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
10762   SDValue N0 = N->getOperand(0);
10763 
10764   // Fold (assertalign (assertalign x, AL0), AL1) ->
10765   // (assertalign x, max(AL0, AL1))
10766   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
10767     return DAG.getAssertAlign(DL, N0.getOperand(0),
10768                               std::max(AL, AAN->getAlign()));
10769 
10770   // In rare cases, there are trivial arithmetic ops in source operands. Sink
10771   // this assert down to source operands so that those arithmetic ops could be
10772   // exposed to the DAG combining.
10773   switch (N0.getOpcode()) {
10774   default:
10775     break;
10776   case ISD::ADD:
10777   case ISD::SUB: {
10778     unsigned AlignShift = Log2(AL);
10779     SDValue LHS = N0.getOperand(0);
10780     SDValue RHS = N0.getOperand(1);
10781     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
10782     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10783     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
10784       if (LHSAlignShift < AlignShift)
10785         LHS = DAG.getAssertAlign(DL, LHS, AL);
10786       if (RHSAlignShift < AlignShift)
10787         RHS = DAG.getAssertAlign(DL, RHS, AL);
10788       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
10789     }
10790     break;
10791   }
10792   }
10793 
10794   return SDValue();
10795 }
10796 
10797 /// If the result of a wider load is shifted to right of N  bits and then
10798 /// truncated to a narrower type and where N is a multiple of number of bits of
10799 /// the narrower type, transform it to a narrower load from address + N / num of
10800 /// bits of new type. Also narrow the load if the result is masked with an AND
10801 /// to effectively produce a smaller type. If the result is to be extended, also
10802 /// fold the extension to form a extending load.
10803 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10804   unsigned Opc = N->getOpcode();
10805 
10806   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10807   SDValue N0 = N->getOperand(0);
10808   EVT VT = N->getValueType(0);
10809   EVT ExtVT = VT;
10810 
10811   // This transformation isn't valid for vector loads.
10812   if (VT.isVector())
10813     return SDValue();
10814 
10815   unsigned ShAmt = 0;
10816   bool HasShiftedOffset = false;
10817   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10818   // extended to VT.
10819   if (Opc == ISD::SIGN_EXTEND_INREG) {
10820     ExtType = ISD::SEXTLOAD;
10821     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10822   } else if (Opc == ISD::SRL) {
10823     // Another special-case: SRL is basically zero-extending a narrower value,
10824     // or it maybe shifting a higher subword, half or byte into the lowest
10825     // bits.
10826     ExtType = ISD::ZEXTLOAD;
10827     N0 = SDValue(N, 0);
10828 
10829     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10830     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10831     if (!N01 || !LN0)
10832       return SDValue();
10833 
10834     uint64_t ShiftAmt = N01->getZExtValue();
10835     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10836     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10837       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10838     else
10839       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10840                                 VT.getSizeInBits() - ShiftAmt);
10841   } else if (Opc == ISD::AND) {
10842     // An AND with a constant mask is the same as a truncate + zero-extend.
10843     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10844     if (!AndC)
10845       return SDValue();
10846 
10847     const APInt &Mask = AndC->getAPIntValue();
10848     unsigned ActiveBits = 0;
10849     if (Mask.isMask()) {
10850       ActiveBits = Mask.countTrailingOnes();
10851     } else if (Mask.isShiftedMask()) {
10852       ShAmt = Mask.countTrailingZeros();
10853       APInt ShiftedMask = Mask.lshr(ShAmt);
10854       ActiveBits = ShiftedMask.countTrailingOnes();
10855       HasShiftedOffset = true;
10856     } else
10857       return SDValue();
10858 
10859     ExtType = ISD::ZEXTLOAD;
10860     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10861   }
10862 
10863   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10864     SDValue SRL = N0;
10865     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10866       ShAmt = ConstShift->getZExtValue();
10867       unsigned EVTBits = ExtVT.getSizeInBits();
10868       // Is the shift amount a multiple of size of VT?
10869       if ((ShAmt & (EVTBits-1)) == 0) {
10870         N0 = N0.getOperand(0);
10871         // Is the load width a multiple of size of VT?
10872         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10873           return SDValue();
10874       }
10875 
10876       // At this point, we must have a load or else we can't do the transform.
10877       auto *LN0 = dyn_cast<LoadSDNode>(N0);
10878       if (!LN0) return SDValue();
10879 
10880       // Because a SRL must be assumed to *need* to zero-extend the high bits
10881       // (as opposed to anyext the high bits), we can't combine the zextload
10882       // lowering of SRL and an sextload.
10883       if (LN0->getExtensionType() == ISD::SEXTLOAD)
10884         return SDValue();
10885 
10886       // If the shift amount is larger than the input type then we're not
10887       // accessing any of the loaded bytes.  If the load was a zextload/extload
10888       // then the result of the shift+trunc is zero/undef (handled elsewhere).
10889       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10890         return SDValue();
10891 
10892       // If the SRL is only used by a masking AND, we may be able to adjust
10893       // the ExtVT to make the AND redundant.
10894       SDNode *Mask = *(SRL->use_begin());
10895       if (Mask->getOpcode() == ISD::AND &&
10896           isa<ConstantSDNode>(Mask->getOperand(1))) {
10897         const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
10898         if (ShiftMask.isMask()) {
10899           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10900                                            ShiftMask.countTrailingOnes());
10901           // If the mask is smaller, recompute the type.
10902           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10903               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10904             ExtVT = MaskedVT;
10905         }
10906       }
10907     }
10908   }
10909 
10910   // If the load is shifted left (and the result isn't shifted back right),
10911   // we can fold the truncate through the shift.
10912   unsigned ShLeftAmt = 0;
10913   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10914       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10915     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10916       ShLeftAmt = N01->getZExtValue();
10917       N0 = N0.getOperand(0);
10918     }
10919   }
10920 
10921   // If we haven't found a load, we can't narrow it.
10922   if (!isa<LoadSDNode>(N0))
10923     return SDValue();
10924 
10925   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10926   // Reducing the width of a volatile load is illegal.  For atomics, we may be
10927   // able to reduce the width provided we never widen again. (see D66309)
10928   if (!LN0->isSimple() ||
10929       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10930     return SDValue();
10931 
10932   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10933     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10934     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10935     return LVTStoreBits - EVTStoreBits - ShAmt;
10936   };
10937 
10938   // For big endian targets, we need to adjust the offset to the pointer to
10939   // load the correct bytes.
10940   if (DAG.getDataLayout().isBigEndian())
10941     ShAmt = AdjustBigEndianShift(ShAmt);
10942 
10943   uint64_t PtrOff = ShAmt / 8;
10944   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10945   SDLoc DL(LN0);
10946   // The original load itself didn't wrap, so an offset within it doesn't.
10947   SDNodeFlags Flags;
10948   Flags.setNoUnsignedWrap(true);
10949   SDValue NewPtr =
10950       DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
10951   AddToWorklist(NewPtr.getNode());
10952 
10953   SDValue Load;
10954   if (ExtType == ISD::NON_EXTLOAD)
10955     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
10956                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10957                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10958   else
10959     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
10960                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10961                           NewAlign, LN0->getMemOperand()->getFlags(),
10962                           LN0->getAAInfo());
10963 
10964   // Replace the old load's chain with the new load's chain.
10965   WorklistRemover DeadNodes(*this);
10966   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10967 
10968   // Shift the result left, if we've swallowed a left shift.
10969   SDValue Result = Load;
10970   if (ShLeftAmt != 0) {
10971     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10972     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10973       ShImmTy = VT;
10974     // If the shift amount is as large as the result size (but, presumably,
10975     // no larger than the source) then the useful bits of the result are
10976     // zero; we can't simply return the shortened shift, because the result
10977     // of that operation is undefined.
10978     if (ShLeftAmt >= VT.getSizeInBits())
10979       Result = DAG.getConstant(0, DL, VT);
10980     else
10981       Result = DAG.getNode(ISD::SHL, DL, VT,
10982                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10983   }
10984 
10985   if (HasShiftedOffset) {
10986     // Recalculate the shift amount after it has been altered to calculate
10987     // the offset.
10988     if (DAG.getDataLayout().isBigEndian())
10989       ShAmt = AdjustBigEndianShift(ShAmt);
10990 
10991     // We're using a shifted mask, so the load now has an offset. This means
10992     // that data has been loaded into the lower bytes than it would have been
10993     // before, so we need to shl the loaded data into the correct position in the
10994     // register.
10995     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10996     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10997     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10998   }
10999 
11000   // Return the new loaded value.
11001   return Result;
11002 }
11003 
11004 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
11005   SDValue N0 = N->getOperand(0);
11006   SDValue N1 = N->getOperand(1);
11007   EVT VT = N->getValueType(0);
11008   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
11009   unsigned VTBits = VT.getScalarSizeInBits();
11010   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
11011 
11012   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11013   if (N0.isUndef())
11014     return DAG.getConstant(0, SDLoc(N), VT);
11015 
11016   // fold (sext_in_reg c1) -> c1
11017   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
11018     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
11019 
11020   // If the input is already sign extended, just drop the extension.
11021   if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
11022     return N0;
11023 
11024   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
11025   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
11026       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
11027     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
11028                        N1);
11029 
11030   // fold (sext_in_reg (sext x)) -> (sext x)
11031   // fold (sext_in_reg (aext x)) -> (sext x)
11032   // if x is small enough or if we know that x has more than 1 sign bit and the
11033   // sign_extend_inreg is extending from one of them.
11034   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
11035     SDValue N00 = N0.getOperand(0);
11036     unsigned N00Bits = N00.getScalarValueSizeInBits();
11037     if ((N00Bits <= ExtVTBits ||
11038          (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
11039         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11040       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
11041   }
11042 
11043   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
11044   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
11045        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
11046        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
11047       N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
11048     if (!LegalOperations ||
11049         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
11050       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
11051                          N0.getOperand(0));
11052   }
11053 
11054   // fold (sext_in_reg (zext x)) -> (sext x)
11055   // iff we are extending the source sign bit.
11056   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
11057     SDValue N00 = N0.getOperand(0);
11058     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
11059         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11060       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
11061   }
11062 
11063   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
11064   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
11065     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
11066 
11067   // fold operands of sext_in_reg based on knowledge that the top bits are not
11068   // demanded.
11069   if (SimplifyDemandedBits(SDValue(N, 0)))
11070     return SDValue(N, 0);
11071 
11072   // fold (sext_in_reg (load x)) -> (smaller sextload x)
11073   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
11074   if (SDValue NarrowLoad = ReduceLoadWidth(N))
11075     return NarrowLoad;
11076 
11077   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
11078   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
11079   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
11080   if (N0.getOpcode() == ISD::SRL) {
11081     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
11082       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
11083         // We can turn this into an SRA iff the input to the SRL is already sign
11084         // extended enough.
11085         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
11086         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
11087           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
11088                              N0.getOperand(1));
11089       }
11090   }
11091 
11092   // fold (sext_inreg (extload x)) -> (sextload x)
11093   // If sextload is not supported by target, we can only do the combine when
11094   // load has one use. Doing otherwise can block folding the extload with other
11095   // extends that the target does support.
11096   if (ISD::isEXTLoad(N0.getNode()) &&
11097       ISD::isUNINDEXEDLoad(N0.getNode()) &&
11098       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11099       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
11100         N0.hasOneUse()) ||
11101        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11102     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11103     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11104                                      LN0->getChain(),
11105                                      LN0->getBasePtr(), ExtVT,
11106                                      LN0->getMemOperand());
11107     CombineTo(N, ExtLoad);
11108     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11109     AddToWorklist(ExtLoad.getNode());
11110     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11111   }
11112   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
11113   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
11114       N0.hasOneUse() &&
11115       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11116       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
11117        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11118     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11119     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11120                                      LN0->getChain(),
11121                                      LN0->getBasePtr(), ExtVT,
11122                                      LN0->getMemOperand());
11123     CombineTo(N, ExtLoad);
11124     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11125     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11126   }
11127 
11128   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
11129   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
11130     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
11131                                            N0.getOperand(1), false))
11132       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
11133   }
11134 
11135   return SDValue();
11136 }
11137 
11138 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
11139   SDValue N0 = N->getOperand(0);
11140   EVT VT = N->getValueType(0);
11141 
11142   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11143   if (N0.isUndef())
11144     return DAG.getConstant(0, SDLoc(N), VT);
11145 
11146   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11147     return Res;
11148 
11149   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11150     return SDValue(N, 0);
11151 
11152   return SDValue();
11153 }
11154 
11155 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
11156   SDValue N0 = N->getOperand(0);
11157   EVT VT = N->getValueType(0);
11158 
11159   // zext_vector_inreg(undef) = 0 because the top bits will be zero.
11160   if (N0.isUndef())
11161     return DAG.getConstant(0, SDLoc(N), VT);
11162 
11163   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11164     return Res;
11165 
11166   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11167     return SDValue(N, 0);
11168 
11169   return SDValue();
11170 }
11171 
11172 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
11173   SDValue N0 = N->getOperand(0);
11174   EVT VT = N->getValueType(0);
11175   EVT SrcVT = N0.getValueType();
11176   bool isLE = DAG.getDataLayout().isLittleEndian();
11177 
11178   // noop truncate
11179   if (SrcVT == VT)
11180     return N0;
11181 
11182   // fold (truncate (truncate x)) -> (truncate x)
11183   if (N0.getOpcode() == ISD::TRUNCATE)
11184     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11185 
11186   // fold (truncate c1) -> c1
11187   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
11188     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
11189     if (C.getNode() != N)
11190       return C;
11191   }
11192 
11193   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
11194   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
11195       N0.getOpcode() == ISD::SIGN_EXTEND ||
11196       N0.getOpcode() == ISD::ANY_EXTEND) {
11197     // if the source is smaller than the dest, we still need an extend.
11198     if (N0.getOperand(0).getValueType().bitsLT(VT))
11199       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11200     // if the source is larger than the dest, than we just need the truncate.
11201     if (N0.getOperand(0).getValueType().bitsGT(VT))
11202       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11203     // if the source and dest are the same type, we can drop both the extend
11204     // and the truncate.
11205     return N0.getOperand(0);
11206   }
11207 
11208   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
11209   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
11210     return SDValue();
11211 
11212   // Fold extract-and-trunc into a narrow extract. For example:
11213   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
11214   //   i32 y = TRUNCATE(i64 x)
11215   //        -- becomes --
11216   //   v16i8 b = BITCAST (v2i64 val)
11217   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
11218   //
11219   // Note: We only run this optimization after type legalization (which often
11220   // creates this pattern) and before operation legalization after which
11221   // we need to be more careful about the vector instructions that we generate.
11222   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11223       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
11224     EVT VecTy = N0.getOperand(0).getValueType();
11225     EVT ExTy = N0.getValueType();
11226     EVT TrTy = N->getValueType(0);
11227 
11228     unsigned NumElem = VecTy.getVectorNumElements();
11229     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
11230 
11231     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
11232     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
11233 
11234     SDValue EltNo = N0->getOperand(1);
11235     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
11236       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11237       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
11238 
11239       SDLoc DL(N);
11240       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
11241                          DAG.getBitcast(NVT, N0.getOperand(0)),
11242                          DAG.getVectorIdxConstant(Index, DL));
11243     }
11244   }
11245 
11246   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
11247   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
11248     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
11249         TLI.isTruncateFree(SrcVT, VT)) {
11250       SDLoc SL(N0);
11251       SDValue Cond = N0.getOperand(0);
11252       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11253       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
11254       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
11255     }
11256   }
11257 
11258   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
11259   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11260       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11261       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
11262     SDValue Amt = N0.getOperand(1);
11263     KnownBits Known = DAG.computeKnownBits(Amt);
11264     unsigned Size = VT.getScalarSizeInBits();
11265     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
11266       SDLoc SL(N);
11267       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
11268 
11269       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11270       if (AmtVT != Amt.getValueType()) {
11271         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
11272         AddToWorklist(Amt.getNode());
11273       }
11274       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
11275     }
11276   }
11277 
11278   // Attempt to pre-truncate BUILD_VECTOR sources.
11279   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
11280       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
11281       // Avoid creating illegal types if running after type legalizer.
11282       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
11283     SDLoc DL(N);
11284     EVT SVT = VT.getScalarType();
11285     SmallVector<SDValue, 8> TruncOps;
11286     for (const SDValue &Op : N0->op_values()) {
11287       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
11288       TruncOps.push_back(TruncOp);
11289     }
11290     return DAG.getBuildVector(VT, DL, TruncOps);
11291   }
11292 
11293   // Fold a series of buildvector, bitcast, and truncate if possible.
11294   // For example fold
11295   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
11296   //   (2xi32 (buildvector x, y)).
11297   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
11298       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
11299       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
11300       N0.getOperand(0).hasOneUse()) {
11301     SDValue BuildVect = N0.getOperand(0);
11302     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
11303     EVT TruncVecEltTy = VT.getVectorElementType();
11304 
11305     // Check that the element types match.
11306     if (BuildVectEltTy == TruncVecEltTy) {
11307       // Now we only need to compute the offset of the truncated elements.
11308       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
11309       unsigned TruncVecNumElts = VT.getVectorNumElements();
11310       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
11311 
11312       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
11313              "Invalid number of elements");
11314 
11315       SmallVector<SDValue, 8> Opnds;
11316       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
11317         Opnds.push_back(BuildVect.getOperand(i));
11318 
11319       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
11320     }
11321   }
11322 
11323   // See if we can simplify the input to this truncate through knowledge that
11324   // only the low bits are being used.
11325   // For example "trunc (or (shl x, 8), y)" // -> trunc y
11326   // Currently we only perform this optimization on scalars because vectors
11327   // may have different active low bits.
11328   if (!VT.isVector()) {
11329     APInt Mask =
11330         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
11331     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
11332       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
11333   }
11334 
11335   // fold (truncate (load x)) -> (smaller load x)
11336   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
11337   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
11338     if (SDValue Reduced = ReduceLoadWidth(N))
11339       return Reduced;
11340 
11341     // Handle the case where the load remains an extending load even
11342     // after truncation.
11343     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
11344       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11345       if (LN0->isSimple() &&
11346           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
11347         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
11348                                          VT, LN0->getChain(), LN0->getBasePtr(),
11349                                          LN0->getMemoryVT(),
11350                                          LN0->getMemOperand());
11351         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
11352         return NewLoad;
11353       }
11354     }
11355   }
11356 
11357   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
11358   // where ... are all 'undef'.
11359   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
11360     SmallVector<EVT, 8> VTs;
11361     SDValue V;
11362     unsigned Idx = 0;
11363     unsigned NumDefs = 0;
11364 
11365     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
11366       SDValue X = N0.getOperand(i);
11367       if (!X.isUndef()) {
11368         V = X;
11369         Idx = i;
11370         NumDefs++;
11371       }
11372       // Stop if more than one members are non-undef.
11373       if (NumDefs > 1)
11374         break;
11375 
11376       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
11377                                      VT.getVectorElementType(),
11378                                      X.getValueType().getVectorElementCount()));
11379     }
11380 
11381     if (NumDefs == 0)
11382       return DAG.getUNDEF(VT);
11383 
11384     if (NumDefs == 1) {
11385       assert(V.getNode() && "The single defined operand is empty!");
11386       SmallVector<SDValue, 8> Opnds;
11387       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
11388         if (i != Idx) {
11389           Opnds.push_back(DAG.getUNDEF(VTs[i]));
11390           continue;
11391         }
11392         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
11393         AddToWorklist(NV.getNode());
11394         Opnds.push_back(NV);
11395       }
11396       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
11397     }
11398   }
11399 
11400   // Fold truncate of a bitcast of a vector to an extract of the low vector
11401   // element.
11402   //
11403   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
11404   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
11405     SDValue VecSrc = N0.getOperand(0);
11406     EVT VecSrcVT = VecSrc.getValueType();
11407     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
11408         (!LegalOperations ||
11409          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
11410       SDLoc SL(N);
11411 
11412       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
11413       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
11414                          DAG.getVectorIdxConstant(Idx, SL));
11415     }
11416   }
11417 
11418   // Simplify the operands using demanded-bits information.
11419   if (!VT.isVector() &&
11420       SimplifyDemandedBits(SDValue(N, 0)))
11421     return SDValue(N, 0);
11422 
11423   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
11424   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
11425   // When the adde's carry is not used.
11426   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
11427       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
11428       // We only do for addcarry before legalize operation
11429       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
11430        TLI.isOperationLegal(N0.getOpcode(), VT))) {
11431     SDLoc SL(N);
11432     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11433     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11434     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
11435     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
11436   }
11437 
11438   // fold (truncate (extract_subvector(ext x))) ->
11439   //      (extract_subvector x)
11440   // TODO: This can be generalized to cover cases where the truncate and extract
11441   // do not fully cancel each other out.
11442   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
11443     SDValue N00 = N0.getOperand(0);
11444     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
11445         N00.getOpcode() == ISD::ZERO_EXTEND ||
11446         N00.getOpcode() == ISD::ANY_EXTEND) {
11447       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
11448           VT.getVectorElementType())
11449         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
11450                            N00.getOperand(0), N0.getOperand(1));
11451     }
11452   }
11453 
11454   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11455     return NewVSel;
11456 
11457   // Narrow a suitable binary operation with a non-opaque constant operand by
11458   // moving it ahead of the truncate. This is limited to pre-legalization
11459   // because targets may prefer a wider type during later combines and invert
11460   // this transform.
11461   switch (N0.getOpcode()) {
11462   case ISD::ADD:
11463   case ISD::SUB:
11464   case ISD::MUL:
11465   case ISD::AND:
11466   case ISD::OR:
11467   case ISD::XOR:
11468     if (!LegalOperations && N0.hasOneUse() &&
11469         (isConstantOrConstantVector(N0.getOperand(0), true) ||
11470          isConstantOrConstantVector(N0.getOperand(1), true))) {
11471       // TODO: We already restricted this to pre-legalization, but for vectors
11472       // we are extra cautious to not create an unsupported operation.
11473       // Target-specific changes are likely needed to avoid regressions here.
11474       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
11475         SDLoc DL(N);
11476         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
11477         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
11478         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
11479       }
11480     }
11481   }
11482 
11483   return SDValue();
11484 }
11485 
11486 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
11487   SDValue Elt = N->getOperand(i);
11488   if (Elt.getOpcode() != ISD::MERGE_VALUES)
11489     return Elt.getNode();
11490   return Elt.getOperand(Elt.getResNo()).getNode();
11491 }
11492 
11493 /// build_pair (load, load) -> load
11494 /// if load locations are consecutive.
11495 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
11496   assert(N->getOpcode() == ISD::BUILD_PAIR);
11497 
11498   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
11499   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
11500 
11501   // A BUILD_PAIR is always having the least significant part in elt 0 and the
11502   // most significant part in elt 1. So when combining into one large load, we
11503   // need to consider the endianness.
11504   if (DAG.getDataLayout().isBigEndian())
11505     std::swap(LD1, LD2);
11506 
11507   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
11508       LD1->getAddressSpace() != LD2->getAddressSpace())
11509     return SDValue();
11510   EVT LD1VT = LD1->getValueType(0);
11511   unsigned LD1Bytes = LD1VT.getStoreSize();
11512   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
11513       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
11514     Align Alignment = LD1->getAlign();
11515     Align NewAlign = DAG.getDataLayout().getABITypeAlign(
11516         VT.getTypeForEVT(*DAG.getContext()));
11517 
11518     if (NewAlign <= Alignment &&
11519         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
11520       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
11521                          LD1->getPointerInfo(), Alignment);
11522   }
11523 
11524   return SDValue();
11525 }
11526 
11527 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
11528   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
11529   // and Lo parts; on big-endian machines it doesn't.
11530   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
11531 }
11532 
11533 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
11534                                     const TargetLowering &TLI) {
11535   // If this is not a bitcast to an FP type or if the target doesn't have
11536   // IEEE754-compliant FP logic, we're done.
11537   EVT VT = N->getValueType(0);
11538   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
11539     return SDValue();
11540 
11541   // TODO: Handle cases where the integer constant is a different scalar
11542   // bitwidth to the FP.
11543   SDValue N0 = N->getOperand(0);
11544   EVT SourceVT = N0.getValueType();
11545   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
11546     return SDValue();
11547 
11548   unsigned FPOpcode;
11549   APInt SignMask;
11550   switch (N0.getOpcode()) {
11551   case ISD::AND:
11552     FPOpcode = ISD::FABS;
11553     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
11554     break;
11555   case ISD::XOR:
11556     FPOpcode = ISD::FNEG;
11557     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11558     break;
11559   case ISD::OR:
11560     FPOpcode = ISD::FABS;
11561     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11562     break;
11563   default:
11564     return SDValue();
11565   }
11566 
11567   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
11568   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
11569   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
11570   //   fneg (fabs X)
11571   SDValue LogicOp0 = N0.getOperand(0);
11572   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
11573   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
11574       LogicOp0.getOpcode() == ISD::BITCAST &&
11575       LogicOp0.getOperand(0).getValueType() == VT) {
11576     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
11577     NumFPLogicOpsConv++;
11578     if (N0.getOpcode() == ISD::OR)
11579       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
11580     return FPOp;
11581   }
11582 
11583   return SDValue();
11584 }
11585 
11586 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11587   SDValue N0 = N->getOperand(0);
11588   EVT VT = N->getValueType(0);
11589 
11590   if (N0.isUndef())
11591     return DAG.getUNDEF(VT);
11592 
11593   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11594   // Only do this before legalize types, unless both types are integer and the
11595   // scalar type is legal. Only do this before legalize ops, since the target
11596   // maybe depending on the bitcast.
11597   // First check to see if this is all constant.
11598   // TODO: Support FP bitcasts after legalize types.
11599   if (VT.isVector() &&
11600       (!LegalTypes ||
11601        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
11602         TLI.isTypeLegal(VT.getVectorElementType()))) &&
11603       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
11604       cast<BuildVectorSDNode>(N0)->isConstant())
11605     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11606                                              VT.getVectorElementType());
11607 
11608   // If the input is a constant, let getNode fold it.
11609   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
11610     // If we can't allow illegal operations, we need to check that this is just
11611     // a fp -> int or int -> conversion and that the resulting operation will
11612     // be legal.
11613     if (!LegalOperations ||
11614         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
11615          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
11616         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
11617          TLI.isOperationLegal(ISD::Constant, VT))) {
11618       SDValue C = DAG.getBitcast(VT, N0);
11619       if (C.getNode() != N)
11620         return C;
11621     }
11622   }
11623 
11624   // (conv (conv x, t1), t2) -> (conv x, t2)
11625   if (N0.getOpcode() == ISD::BITCAST)
11626     return DAG.getBitcast(VT, N0.getOperand(0));
11627 
11628   // fold (conv (load x)) -> (load (conv*)x)
11629   // If the resultant load doesn't need a higher alignment than the original!
11630   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11631       // Do not remove the cast if the types differ in endian layout.
11632       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
11633           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11634       // If the load is volatile, we only want to change the load type if the
11635       // resulting load is legal. Otherwise we might increase the number of
11636       // memory accesses. We don't care if the original type was legal or not
11637       // as we assume software couldn't rely on the number of accesses of an
11638       // illegal type.
11639       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
11640        TLI.isOperationLegal(ISD::LOAD, VT))) {
11641     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11642 
11643     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11644                                     *LN0->getMemOperand())) {
11645       SDValue Load =
11646           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11647                       LN0->getPointerInfo(), LN0->getAlignment(),
11648                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11649       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11650       return Load;
11651     }
11652   }
11653 
11654   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11655     return V;
11656 
11657   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11658   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11659   //
11660   // For ppc_fp128:
11661   // fold (bitcast (fneg x)) ->
11662   //     flipbit = signbit
11663   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11664   //
11665   // fold (bitcast (fabs x)) ->
11666   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
11667   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11668   // This often reduces constant pool loads.
11669   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11670        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11671       N0.getNode()->hasOneUse() && VT.isInteger() &&
11672       !VT.isVector() && !N0.getValueType().isVector()) {
11673     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11674     AddToWorklist(NewConv.getNode());
11675 
11676     SDLoc DL(N);
11677     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11678       assert(VT.getSizeInBits() == 128);
11679       SDValue SignBit = DAG.getConstant(
11680           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11681       SDValue FlipBit;
11682       if (N0.getOpcode() == ISD::FNEG) {
11683         FlipBit = SignBit;
11684         AddToWorklist(FlipBit.getNode());
11685       } else {
11686         assert(N0.getOpcode() == ISD::FABS);
11687         SDValue Hi =
11688             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11689                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11690                                               SDLoc(NewConv)));
11691         AddToWorklist(Hi.getNode());
11692         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11693         AddToWorklist(FlipBit.getNode());
11694       }
11695       SDValue FlipBits =
11696           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11697       AddToWorklist(FlipBits.getNode());
11698       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11699     }
11700     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11701     if (N0.getOpcode() == ISD::FNEG)
11702       return DAG.getNode(ISD::XOR, DL, VT,
11703                          NewConv, DAG.getConstant(SignBit, DL, VT));
11704     assert(N0.getOpcode() == ISD::FABS);
11705     return DAG.getNode(ISD::AND, DL, VT,
11706                        NewConv, DAG.getConstant(~SignBit, DL, VT));
11707   }
11708 
11709   // fold (bitconvert (fcopysign cst, x)) ->
11710   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
11711   // Note that we don't handle (copysign x, cst) because this can always be
11712   // folded to an fneg or fabs.
11713   //
11714   // For ppc_fp128:
11715   // fold (bitcast (fcopysign cst, x)) ->
11716   //     flipbit = (and (extract_element
11717   //                     (xor (bitcast cst), (bitcast x)), 0),
11718   //                    signbit)
11719   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
11720   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11721       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11722       VT.isInteger() && !VT.isVector()) {
11723     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11724     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11725     if (isTypeLegal(IntXVT)) {
11726       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11727       AddToWorklist(X.getNode());
11728 
11729       // If X has a different width than the result/lhs, sext it or truncate it.
11730       unsigned VTWidth = VT.getSizeInBits();
11731       if (OrigXWidth < VTWidth) {
11732         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11733         AddToWorklist(X.getNode());
11734       } else if (OrigXWidth > VTWidth) {
11735         // To get the sign bit in the right place, we have to shift it right
11736         // before truncating.
11737         SDLoc DL(X);
11738         X = DAG.getNode(ISD::SRL, DL,
11739                         X.getValueType(), X,
11740                         DAG.getConstant(OrigXWidth-VTWidth, DL,
11741                                         X.getValueType()));
11742         AddToWorklist(X.getNode());
11743         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11744         AddToWorklist(X.getNode());
11745       }
11746 
11747       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11748         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11749         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11750         AddToWorklist(Cst.getNode());
11751         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11752         AddToWorklist(X.getNode());
11753         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11754         AddToWorklist(XorResult.getNode());
11755         SDValue XorResult64 = DAG.getNode(
11756             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11757             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11758                                   SDLoc(XorResult)));
11759         AddToWorklist(XorResult64.getNode());
11760         SDValue FlipBit =
11761             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11762                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11763         AddToWorklist(FlipBit.getNode());
11764         SDValue FlipBits =
11765             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11766         AddToWorklist(FlipBits.getNode());
11767         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11768       }
11769       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11770       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11771                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
11772       AddToWorklist(X.getNode());
11773 
11774       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11775       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11776                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11777       AddToWorklist(Cst.getNode());
11778 
11779       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11780     }
11781   }
11782 
11783   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11784   if (N0.getOpcode() == ISD::BUILD_PAIR)
11785     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11786       return CombineLD;
11787 
11788   // Remove double bitcasts from shuffles - this is often a legacy of
11789   // XformToShuffleWithZero being used to combine bitmaskings (of
11790   // float vectors bitcast to integer vectors) into shuffles.
11791   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11792   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11793       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11794       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11795       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11796     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11797 
11798     // If operands are a bitcast, peek through if it casts the original VT.
11799     // If operands are a constant, just bitcast back to original VT.
11800     auto PeekThroughBitcast = [&](SDValue Op) {
11801       if (Op.getOpcode() == ISD::BITCAST &&
11802           Op.getOperand(0).getValueType() == VT)
11803         return SDValue(Op.getOperand(0));
11804       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11805           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
11806         return DAG.getBitcast(VT, Op);
11807       return SDValue();
11808     };
11809 
11810     // FIXME: If either input vector is bitcast, try to convert the shuffle to
11811     // the result type of this bitcast. This would eliminate at least one
11812     // bitcast. See the transform in InstCombine.
11813     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11814     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11815     if (!(SV0 && SV1))
11816       return SDValue();
11817 
11818     int MaskScale =
11819         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11820     SmallVector<int, 8> NewMask;
11821     for (int M : SVN->getMask())
11822       for (int i = 0; i != MaskScale; ++i)
11823         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11824 
11825     SDValue LegalShuffle =
11826         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
11827     if (LegalShuffle)
11828       return LegalShuffle;
11829   }
11830 
11831   return SDValue();
11832 }
11833 
11834 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11835   EVT VT = N->getValueType(0);
11836   return CombineConsecutiveLoads(N, VT);
11837 }
11838 
11839 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
11840   SDValue N0 = N->getOperand(0);
11841 
11842   // (freeze (freeze x)) -> (freeze x)
11843   if (N0.getOpcode() == ISD::FREEZE)
11844     return N0;
11845 
11846   // If the input is a constant, return it.
11847   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0))
11848     return N0;
11849 
11850   return SDValue();
11851 }
11852 
11853 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11854 /// operands. DstEltVT indicates the destination element value type.
11855 SDValue DAGCombiner::
11856 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11857   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11858 
11859   // If this is already the right type, we're done.
11860   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11861 
11862   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11863   unsigned DstBitSize = DstEltVT.getSizeInBits();
11864 
11865   // If this is a conversion of N elements of one type to N elements of another
11866   // type, convert each element.  This handles FP<->INT cases.
11867   if (SrcBitSize == DstBitSize) {
11868     SmallVector<SDValue, 8> Ops;
11869     for (SDValue Op : BV->op_values()) {
11870       // If the vector element type is not legal, the BUILD_VECTOR operands
11871       // are promoted and implicitly truncated.  Make that explicit here.
11872       if (Op.getValueType() != SrcEltVT)
11873         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11874       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11875       AddToWorklist(Ops.back().getNode());
11876     }
11877     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11878                               BV->getValueType(0).getVectorNumElements());
11879     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11880   }
11881 
11882   // Otherwise, we're growing or shrinking the elements.  To avoid having to
11883   // handle annoying details of growing/shrinking FP values, we convert them to
11884   // int first.
11885   if (SrcEltVT.isFloatingPoint()) {
11886     // Convert the input float vector to a int vector where the elements are the
11887     // same sizes.
11888     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11889     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11890     SrcEltVT = IntVT;
11891   }
11892 
11893   // Now we know the input is an integer vector.  If the output is a FP type,
11894   // convert to integer first, then to FP of the right size.
11895   if (DstEltVT.isFloatingPoint()) {
11896     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11897     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11898 
11899     // Next, convert to FP elements of the same size.
11900     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11901   }
11902 
11903   SDLoc DL(BV);
11904 
11905   // Okay, we know the src/dst types are both integers of differing types.
11906   // Handling growing first.
11907   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11908   if (SrcBitSize < DstBitSize) {
11909     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11910 
11911     SmallVector<SDValue, 8> Ops;
11912     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11913          i += NumInputsPerOutput) {
11914       bool isLE = DAG.getDataLayout().isLittleEndian();
11915       APInt NewBits = APInt(DstBitSize, 0);
11916       bool EltIsUndef = true;
11917       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11918         // Shift the previously computed bits over.
11919         NewBits <<= SrcBitSize;
11920         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11921         if (Op.isUndef()) continue;
11922         EltIsUndef = false;
11923 
11924         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11925                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
11926       }
11927 
11928       if (EltIsUndef)
11929         Ops.push_back(DAG.getUNDEF(DstEltVT));
11930       else
11931         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11932     }
11933 
11934     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11935     return DAG.getBuildVector(VT, DL, Ops);
11936   }
11937 
11938   // Finally, this must be the case where we are shrinking elements: each input
11939   // turns into multiple outputs.
11940   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11941   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11942                             NumOutputsPerInput*BV->getNumOperands());
11943   SmallVector<SDValue, 8> Ops;
11944 
11945   for (const SDValue &Op : BV->op_values()) {
11946     if (Op.isUndef()) {
11947       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11948       continue;
11949     }
11950 
11951     APInt OpVal = cast<ConstantSDNode>(Op)->
11952                   getAPIntValue().zextOrTrunc(SrcBitSize);
11953 
11954     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11955       APInt ThisVal = OpVal.trunc(DstBitSize);
11956       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11957       OpVal.lshrInPlace(DstBitSize);
11958     }
11959 
11960     // For big endian targets, swap the order of the pieces of each element.
11961     if (DAG.getDataLayout().isBigEndian())
11962       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11963   }
11964 
11965   return DAG.getBuildVector(VT, DL, Ops);
11966 }
11967 
11968 static bool isContractable(SDNode *N) {
11969   SDNodeFlags F = N->getFlags();
11970   return F.hasAllowContract() || F.hasAllowReassociation();
11971 }
11972 
11973 /// Try to perform FMA combining on a given FADD node.
11974 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11975   SDValue N0 = N->getOperand(0);
11976   SDValue N1 = N->getOperand(1);
11977   EVT VT = N->getValueType(0);
11978   SDLoc SL(N);
11979 
11980   const TargetOptions &Options = DAG.getTarget().Options;
11981 
11982   // Floating-point multiply-add with intermediate rounding.
11983   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
11984 
11985   // Floating-point multiply-add without intermediate rounding.
11986   bool HasFMA =
11987       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
11988       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11989 
11990   // No valid opcode, do not combine.
11991   if (!HasFMAD && !HasFMA)
11992     return SDValue();
11993 
11994   SDNodeFlags Flags = N->getFlags();
11995   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11996   bool CanReassociate =
11997       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
11998   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11999                               CanFuse || HasFMAD);
12000   // If the addition is not contractable, do not combine.
12001   if (!AllowFusionGlobally && !isContractable(N))
12002     return SDValue();
12003 
12004   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12005     return SDValue();
12006 
12007   // Always prefer FMAD to FMA for precision.
12008   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12009   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12010 
12011   // Is the node an FMUL and contractable either due to global flags or
12012   // SDNodeFlags.
12013   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12014     if (N.getOpcode() != ISD::FMUL)
12015       return false;
12016     return AllowFusionGlobally || isContractable(N.getNode());
12017   };
12018   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
12019   // prefer to fold the multiply with fewer uses.
12020   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
12021     if (N0.getNode()->use_size() > N1.getNode()->use_size())
12022       std::swap(N0, N1);
12023   }
12024 
12025   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
12026   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
12027     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12028                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
12029   }
12030 
12031   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
12032   // Note: Commutes FADD operands.
12033   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
12034     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12035                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
12036   }
12037 
12038   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
12039   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
12040   // This requires reassociation because it changes the order of operations.
12041   SDValue FMA, E;
12042   if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
12043       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
12044       N0.getOperand(2).hasOneUse()) {
12045     FMA = N0;
12046     E = N1;
12047   } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
12048              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
12049              N1.getOperand(2).hasOneUse()) {
12050     FMA = N1;
12051     E = N0;
12052   }
12053   if (FMA && E) {
12054     SDValue A = FMA.getOperand(0);
12055     SDValue B = FMA.getOperand(1);
12056     SDValue C = FMA.getOperand(2).getOperand(0);
12057     SDValue D = FMA.getOperand(2).getOperand(1);
12058     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags);
12059     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags);
12060   }
12061 
12062   // Look through FP_EXTEND nodes to do more combining.
12063 
12064   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
12065   if (N0.getOpcode() == ISD::FP_EXTEND) {
12066     SDValue N00 = N0.getOperand(0);
12067     if (isContractableFMUL(N00) &&
12068         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12069                             N00.getValueType())) {
12070       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12071                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12072                                      N00.getOperand(0)),
12073                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12074                                      N00.getOperand(1)), N1, Flags);
12075     }
12076   }
12077 
12078   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
12079   // Note: Commutes FADD operands.
12080   if (N1.getOpcode() == ISD::FP_EXTEND) {
12081     SDValue N10 = N1.getOperand(0);
12082     if (isContractableFMUL(N10) &&
12083         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12084                             N10.getValueType())) {
12085       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12086                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12087                                      N10.getOperand(0)),
12088                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12089                                      N10.getOperand(1)), N0, Flags);
12090     }
12091   }
12092 
12093   // More folding opportunities when target permits.
12094   if (Aggressive) {
12095     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
12096     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
12097     auto FoldFAddFMAFPExtFMul = [&] (
12098       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
12099       SDNodeFlags Flags) {
12100       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
12101                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12102                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12103                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12104                                      Z, Flags), Flags);
12105     };
12106     if (N0.getOpcode() == PreferredFusedOpcode) {
12107       SDValue N02 = N0.getOperand(2);
12108       if (N02.getOpcode() == ISD::FP_EXTEND) {
12109         SDValue N020 = N02.getOperand(0);
12110         if (isContractableFMUL(N020) &&
12111             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12112                                 N020.getValueType())) {
12113           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
12114                                       N020.getOperand(0), N020.getOperand(1),
12115                                       N1, Flags);
12116         }
12117       }
12118     }
12119 
12120     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
12121     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
12122     // FIXME: This turns two single-precision and one double-precision
12123     // operation into two double-precision operations, which might not be
12124     // interesting for all targets, especially GPUs.
12125     auto FoldFAddFPExtFMAFMul = [&] (
12126       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
12127       SDNodeFlags Flags) {
12128       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12129                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
12130                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
12131                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12132                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12133                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12134                                      Z, Flags), Flags);
12135     };
12136     if (N0.getOpcode() == ISD::FP_EXTEND) {
12137       SDValue N00 = N0.getOperand(0);
12138       if (N00.getOpcode() == PreferredFusedOpcode) {
12139         SDValue N002 = N00.getOperand(2);
12140         if (isContractableFMUL(N002) &&
12141             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12142                                 N00.getValueType())) {
12143           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
12144                                       N002.getOperand(0), N002.getOperand(1),
12145                                       N1, Flags);
12146         }
12147       }
12148     }
12149 
12150     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
12151     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
12152     if (N1.getOpcode() == PreferredFusedOpcode) {
12153       SDValue N12 = N1.getOperand(2);
12154       if (N12.getOpcode() == ISD::FP_EXTEND) {
12155         SDValue N120 = N12.getOperand(0);
12156         if (isContractableFMUL(N120) &&
12157             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12158                                 N120.getValueType())) {
12159           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
12160                                       N120.getOperand(0), N120.getOperand(1),
12161                                       N0, Flags);
12162         }
12163       }
12164     }
12165 
12166     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
12167     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
12168     // FIXME: This turns two single-precision and one double-precision
12169     // operation into two double-precision operations, which might not be
12170     // interesting for all targets, especially GPUs.
12171     if (N1.getOpcode() == ISD::FP_EXTEND) {
12172       SDValue N10 = N1.getOperand(0);
12173       if (N10.getOpcode() == PreferredFusedOpcode) {
12174         SDValue N102 = N10.getOperand(2);
12175         if (isContractableFMUL(N102) &&
12176             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12177                                 N10.getValueType())) {
12178           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
12179                                       N102.getOperand(0), N102.getOperand(1),
12180                                       N0, Flags);
12181         }
12182       }
12183     }
12184   }
12185 
12186   return SDValue();
12187 }
12188 
12189 /// Try to perform FMA combining on a given FSUB node.
12190 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
12191   SDValue N0 = N->getOperand(0);
12192   SDValue N1 = N->getOperand(1);
12193   EVT VT = N->getValueType(0);
12194   SDLoc SL(N);
12195 
12196   const TargetOptions &Options = DAG.getTarget().Options;
12197   // Floating-point multiply-add with intermediate rounding.
12198   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12199 
12200   // Floating-point multiply-add without intermediate rounding.
12201   bool HasFMA =
12202       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12203       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12204 
12205   // No valid opcode, do not combine.
12206   if (!HasFMAD && !HasFMA)
12207     return SDValue();
12208 
12209   const SDNodeFlags Flags = N->getFlags();
12210   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12211   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12212                               CanFuse || HasFMAD);
12213 
12214   // If the subtraction is not contractable, do not combine.
12215   if (!AllowFusionGlobally && !isContractable(N))
12216     return SDValue();
12217 
12218   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12219     return SDValue();
12220 
12221   // Always prefer FMAD to FMA for precision.
12222   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12223   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12224   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
12225 
12226   // Is the node an FMUL and contractable either due to global flags or
12227   // SDNodeFlags.
12228   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12229     if (N.getOpcode() != ISD::FMUL)
12230       return false;
12231     return AllowFusionGlobally || isContractable(N.getNode());
12232   };
12233 
12234   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12235   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
12236     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
12237       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
12238                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z),
12239                          Flags);
12240     }
12241     return SDValue();
12242   };
12243 
12244   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12245   // Note: Commutes FSUB operands.
12246   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
12247     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
12248       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12249                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
12250                          YZ.getOperand(1), X, Flags);
12251     }
12252     return SDValue();
12253   };
12254 
12255   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
12256   // prefer to fold the multiply with fewer uses.
12257   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
12258       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
12259     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
12260     if (SDValue V = tryToFoldXSubYZ(N0, N1))
12261       return V;
12262     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
12263     if (SDValue V = tryToFoldXYSubZ(N0, N1))
12264       return V;
12265   } else {
12266     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12267     if (SDValue V = tryToFoldXYSubZ(N0, N1))
12268       return V;
12269     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12270     if (SDValue V = tryToFoldXSubYZ(N0, N1))
12271       return V;
12272   }
12273 
12274   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
12275   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
12276       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
12277     SDValue N00 = N0.getOperand(0).getOperand(0);
12278     SDValue N01 = N0.getOperand(0).getOperand(1);
12279     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12280                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
12281                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
12282   }
12283 
12284   // Look through FP_EXTEND nodes to do more combining.
12285 
12286   // fold (fsub (fpext (fmul x, y)), z)
12287   //   -> (fma (fpext x), (fpext y), (fneg z))
12288   if (N0.getOpcode() == ISD::FP_EXTEND) {
12289     SDValue N00 = N0.getOperand(0);
12290     if (isContractableFMUL(N00) &&
12291         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12292                             N00.getValueType())) {
12293       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12294                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12295                                      N00.getOperand(0)),
12296                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12297                                      N00.getOperand(1)),
12298                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
12299     }
12300   }
12301 
12302   // fold (fsub x, (fpext (fmul y, z)))
12303   //   -> (fma (fneg (fpext y)), (fpext z), x)
12304   // Note: Commutes FSUB operands.
12305   if (N1.getOpcode() == ISD::FP_EXTEND) {
12306     SDValue N10 = N1.getOperand(0);
12307     if (isContractableFMUL(N10) &&
12308         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12309                             N10.getValueType())) {
12310       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12311                          DAG.getNode(ISD::FNEG, SL, VT,
12312                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
12313                                                  N10.getOperand(0))),
12314                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12315                                      N10.getOperand(1)),
12316                          N0, Flags);
12317     }
12318   }
12319 
12320   // fold (fsub (fpext (fneg (fmul, x, y))), z)
12321   //   -> (fneg (fma (fpext x), (fpext y), z))
12322   // Note: This could be removed with appropriate canonicalization of the
12323   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12324   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12325   // from implementing the canonicalization in visitFSUB.
12326   if (N0.getOpcode() == ISD::FP_EXTEND) {
12327     SDValue N00 = N0.getOperand(0);
12328     if (N00.getOpcode() == ISD::FNEG) {
12329       SDValue N000 = N00.getOperand(0);
12330       if (isContractableFMUL(N000) &&
12331           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12332                               N00.getValueType())) {
12333         return DAG.getNode(ISD::FNEG, SL, VT,
12334                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12335                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12336                                                    N000.getOperand(0)),
12337                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12338                                                    N000.getOperand(1)),
12339                                        N1, Flags));
12340       }
12341     }
12342   }
12343 
12344   // fold (fsub (fneg (fpext (fmul, x, y))), z)
12345   //   -> (fneg (fma (fpext x)), (fpext y), z)
12346   // Note: This could be removed with appropriate canonicalization of the
12347   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12348   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12349   // from implementing the canonicalization in visitFSUB.
12350   if (N0.getOpcode() == ISD::FNEG) {
12351     SDValue N00 = N0.getOperand(0);
12352     if (N00.getOpcode() == ISD::FP_EXTEND) {
12353       SDValue N000 = N00.getOperand(0);
12354       if (isContractableFMUL(N000) &&
12355           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12356                               N000.getValueType())) {
12357         return DAG.getNode(ISD::FNEG, SL, VT,
12358                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12359                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12360                                                    N000.getOperand(0)),
12361                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12362                                                    N000.getOperand(1)),
12363                                        N1, Flags));
12364       }
12365     }
12366   }
12367 
12368   // More folding opportunities when target permits.
12369   if (Aggressive) {
12370     // fold (fsub (fma x, y, (fmul u, v)), z)
12371     //   -> (fma x, y (fma u, v, (fneg z)))
12372     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
12373         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
12374         N0.getOperand(2)->hasOneUse()) {
12375       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12376                          N0.getOperand(0), N0.getOperand(1),
12377                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12378                                      N0.getOperand(2).getOperand(0),
12379                                      N0.getOperand(2).getOperand(1),
12380                                      DAG.getNode(ISD::FNEG, SL, VT,
12381                                                  N1), Flags), Flags);
12382     }
12383 
12384     // fold (fsub x, (fma y, z, (fmul u, v)))
12385     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
12386     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
12387         isContractableFMUL(N1.getOperand(2)) &&
12388         N1->hasOneUse() && NoSignedZero) {
12389       SDValue N20 = N1.getOperand(2).getOperand(0);
12390       SDValue N21 = N1.getOperand(2).getOperand(1);
12391       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12392                          DAG.getNode(ISD::FNEG, SL, VT,
12393                                      N1.getOperand(0)),
12394                          N1.getOperand(1),
12395                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12396                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
12397                                      N21, N0, Flags), Flags);
12398     }
12399 
12400 
12401     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
12402     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
12403     if (N0.getOpcode() == PreferredFusedOpcode &&
12404         N0->hasOneUse()) {
12405       SDValue N02 = N0.getOperand(2);
12406       if (N02.getOpcode() == ISD::FP_EXTEND) {
12407         SDValue N020 = N02.getOperand(0);
12408         if (isContractableFMUL(N020) &&
12409             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12410                                 N020.getValueType())) {
12411           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12412                              N0.getOperand(0), N0.getOperand(1),
12413                              DAG.getNode(PreferredFusedOpcode, SL, VT,
12414                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12415                                                      N020.getOperand(0)),
12416                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12417                                                      N020.getOperand(1)),
12418                                          DAG.getNode(ISD::FNEG, SL, VT,
12419                                                      N1), Flags), Flags);
12420         }
12421       }
12422     }
12423 
12424     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
12425     //   -> (fma (fpext x), (fpext y),
12426     //           (fma (fpext u), (fpext v), (fneg z)))
12427     // FIXME: This turns two single-precision and one double-precision
12428     // operation into two double-precision operations, which might not be
12429     // interesting for all targets, especially GPUs.
12430     if (N0.getOpcode() == ISD::FP_EXTEND) {
12431       SDValue N00 = N0.getOperand(0);
12432       if (N00.getOpcode() == PreferredFusedOpcode) {
12433         SDValue N002 = N00.getOperand(2);
12434         if (isContractableFMUL(N002) &&
12435             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12436                                 N00.getValueType())) {
12437           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12438                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
12439                                          N00.getOperand(0)),
12440                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
12441                                          N00.getOperand(1)),
12442                              DAG.getNode(PreferredFusedOpcode, SL, VT,
12443                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12444                                                      N002.getOperand(0)),
12445                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12446                                                      N002.getOperand(1)),
12447                                          DAG.getNode(ISD::FNEG, SL, VT,
12448                                                      N1), Flags), Flags);
12449         }
12450       }
12451     }
12452 
12453     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
12454     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
12455     if (N1.getOpcode() == PreferredFusedOpcode &&
12456         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
12457         N1->hasOneUse()) {
12458       SDValue N120 = N1.getOperand(2).getOperand(0);
12459       if (isContractableFMUL(N120) &&
12460           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12461                               N120.getValueType())) {
12462         SDValue N1200 = N120.getOperand(0);
12463         SDValue N1201 = N120.getOperand(1);
12464         return DAG.getNode(PreferredFusedOpcode, SL, VT,
12465                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
12466                            N1.getOperand(1),
12467                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12468                                        DAG.getNode(ISD::FNEG, SL, VT,
12469                                                    DAG.getNode(ISD::FP_EXTEND, SL,
12470                                                                VT, N1200)),
12471                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12472                                                    N1201),
12473                                        N0, Flags), Flags);
12474       }
12475     }
12476 
12477     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
12478     //   -> (fma (fneg (fpext y)), (fpext z),
12479     //           (fma (fneg (fpext u)), (fpext v), x))
12480     // FIXME: This turns two single-precision and one double-precision
12481     // operation into two double-precision operations, which might not be
12482     // interesting for all targets, especially GPUs.
12483     if (N1.getOpcode() == ISD::FP_EXTEND &&
12484         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
12485       SDValue CvtSrc = N1.getOperand(0);
12486       SDValue N100 = CvtSrc.getOperand(0);
12487       SDValue N101 = CvtSrc.getOperand(1);
12488       SDValue N102 = CvtSrc.getOperand(2);
12489       if (isContractableFMUL(N102) &&
12490           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12491                               CvtSrc.getValueType())) {
12492         SDValue N1020 = N102.getOperand(0);
12493         SDValue N1021 = N102.getOperand(1);
12494         return DAG.getNode(PreferredFusedOpcode, SL, VT,
12495                            DAG.getNode(ISD::FNEG, SL, VT,
12496                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12497                                                    N100)),
12498                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
12499                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12500                                        DAG.getNode(ISD::FNEG, SL, VT,
12501                                                    DAG.getNode(ISD::FP_EXTEND, SL,
12502                                                                VT, N1020)),
12503                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12504                                                    N1021),
12505                                        N0, Flags), Flags);
12506       }
12507     }
12508   }
12509 
12510   return SDValue();
12511 }
12512 
12513 /// Try to perform FMA combining on a given FMUL node based on the distributive
12514 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
12515 /// subtraction instead of addition).
12516 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
12517   SDValue N0 = N->getOperand(0);
12518   SDValue N1 = N->getOperand(1);
12519   EVT VT = N->getValueType(0);
12520   SDLoc SL(N);
12521   const SDNodeFlags Flags = N->getFlags();
12522 
12523   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
12524 
12525   const TargetOptions &Options = DAG.getTarget().Options;
12526 
12527   // The transforms below are incorrect when x == 0 and y == inf, because the
12528   // intermediate multiplication produces a nan.
12529   if (!Options.NoInfsFPMath)
12530     return SDValue();
12531 
12532   // Floating-point multiply-add without intermediate rounding.
12533   bool HasFMA =
12534       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
12535       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12536       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12537 
12538   // Floating-point multiply-add with intermediate rounding. This can result
12539   // in a less precise result due to the changed rounding order.
12540   bool HasFMAD = Options.UnsafeFPMath &&
12541                  (LegalOperations && TLI.isFMADLegal(DAG, N));
12542 
12543   // No valid opcode, do not combine.
12544   if (!HasFMAD && !HasFMA)
12545     return SDValue();
12546 
12547   // Always prefer FMAD to FMA for precision.
12548   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12549   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12550 
12551   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
12552   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
12553   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12554     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
12555       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
12556         if (C->isExactlyValue(+1.0))
12557           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12558                              Y, Flags);
12559         if (C->isExactlyValue(-1.0))
12560           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12561                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12562       }
12563     }
12564     return SDValue();
12565   };
12566 
12567   if (SDValue FMA = FuseFADD(N0, N1, Flags))
12568     return FMA;
12569   if (SDValue FMA = FuseFADD(N1, N0, Flags))
12570     return FMA;
12571 
12572   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
12573   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
12574   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
12575   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
12576   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12577     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
12578       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
12579         if (C0->isExactlyValue(+1.0))
12580           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12581                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12582                              Y, Flags);
12583         if (C0->isExactlyValue(-1.0))
12584           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12585                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12586                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12587       }
12588       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
12589         if (C1->isExactlyValue(+1.0))
12590           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12591                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12592         if (C1->isExactlyValue(-1.0))
12593           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12594                              Y, Flags);
12595       }
12596     }
12597     return SDValue();
12598   };
12599 
12600   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
12601     return FMA;
12602   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
12603     return FMA;
12604 
12605   return SDValue();
12606 }
12607 
12608 SDValue DAGCombiner::visitFADD(SDNode *N) {
12609   SDValue N0 = N->getOperand(0);
12610   SDValue N1 = N->getOperand(1);
12611   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12612   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12613   EVT VT = N->getValueType(0);
12614   SDLoc DL(N);
12615   const TargetOptions &Options = DAG.getTarget().Options;
12616   const SDNodeFlags Flags = N->getFlags();
12617 
12618   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12619     return R;
12620 
12621   // fold vector ops
12622   if (VT.isVector())
12623     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12624       return FoldedVOp;
12625 
12626   // fold (fadd c1, c2) -> c1 + c2
12627   if (N0CFP && N1CFP)
12628     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
12629 
12630   // canonicalize constant to RHS
12631   if (N0CFP && !N1CFP)
12632     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
12633 
12634   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
12635   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12636   if (N1C && N1C->isZero())
12637     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
12638       return N0;
12639 
12640   if (SDValue NewSel = foldBinOpIntoSelect(N))
12641     return NewSel;
12642 
12643   // fold (fadd A, (fneg B)) -> (fsub A, B)
12644   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
12645     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
12646             N1, DAG, LegalOperations, ForCodeSize))
12647       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags);
12648 
12649   // fold (fadd (fneg A), B) -> (fsub B, A)
12650   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
12651     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
12652             N0, DAG, LegalOperations, ForCodeSize))
12653       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags);
12654 
12655   auto isFMulNegTwo = [](SDValue FMul) {
12656     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
12657       return false;
12658     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12659     return C && C->isExactlyValue(-2.0);
12660   };
12661 
12662   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12663   if (isFMulNegTwo(N0)) {
12664     SDValue B = N0.getOperand(0);
12665     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12666     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
12667   }
12668   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12669   if (isFMulNegTwo(N1)) {
12670     SDValue B = N1.getOperand(0);
12671     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12672     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
12673   }
12674 
12675   // No FP constant should be created after legalization as Instruction
12676   // Selection pass has a hard time dealing with FP constants.
12677   bool AllowNewConst = (Level < AfterLegalizeDAG);
12678 
12679   // If nnan is enabled, fold lots of things.
12680   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
12681     // If allowed, fold (fadd (fneg x), x) -> 0.0
12682     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
12683       return DAG.getConstantFP(0.0, DL, VT);
12684 
12685     // If allowed, fold (fadd x, (fneg x)) -> 0.0
12686     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
12687       return DAG.getConstantFP(0.0, DL, VT);
12688   }
12689 
12690   // If 'unsafe math' or reassoc and nsz, fold lots of things.
12691   // TODO: break out portions of the transformations below for which Unsafe is
12692   //       considered and which do not require both nsz and reassoc
12693   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12694        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12695       AllowNewConst) {
12696     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12697     if (N1CFP && N0.getOpcode() == ISD::FADD &&
12698         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12699       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
12700       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
12701     }
12702 
12703     // We can fold chains of FADD's of the same value into multiplications.
12704     // This transform is not safe in general because we are reducing the number
12705     // of rounding steps.
12706     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
12707       if (N0.getOpcode() == ISD::FMUL) {
12708         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12709         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
12710 
12711         // (fadd (fmul x, c), x) -> (fmul x, c+1)
12712         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
12713           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12714                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12715           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
12716         }
12717 
12718         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12719         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
12720             N1.getOperand(0) == N1.getOperand(1) &&
12721             N0.getOperand(0) == N1.getOperand(0)) {
12722           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12723                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12724           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12725         }
12726       }
12727 
12728       if (N1.getOpcode() == ISD::FMUL) {
12729         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12730         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12731 
12732         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12733         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12734           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12735                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12736           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12737         }
12738 
12739         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12740         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12741             N0.getOperand(0) == N0.getOperand(1) &&
12742             N1.getOperand(0) == N0.getOperand(0)) {
12743           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12744                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12745           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12746         }
12747       }
12748 
12749       if (N0.getOpcode() == ISD::FADD) {
12750         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12751         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12752         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12753             (N0.getOperand(0) == N1)) {
12754           return DAG.getNode(ISD::FMUL, DL, VT,
12755                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12756         }
12757       }
12758 
12759       if (N1.getOpcode() == ISD::FADD) {
12760         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12761         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12762         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12763             N1.getOperand(0) == N0) {
12764           return DAG.getNode(ISD::FMUL, DL, VT,
12765                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12766         }
12767       }
12768 
12769       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12770       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12771           N0.getOperand(0) == N0.getOperand(1) &&
12772           N1.getOperand(0) == N1.getOperand(1) &&
12773           N0.getOperand(0) == N1.getOperand(0)) {
12774         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12775                            DAG.getConstantFP(4.0, DL, VT), Flags);
12776       }
12777     }
12778   } // enable-unsafe-fp-math
12779 
12780   // FADD -> FMA combines:
12781   if (SDValue Fused = visitFADDForFMACombine(N)) {
12782     AddToWorklist(Fused.getNode());
12783     return Fused;
12784   }
12785   return SDValue();
12786 }
12787 
12788 SDValue DAGCombiner::visitFSUB(SDNode *N) {
12789   SDValue N0 = N->getOperand(0);
12790   SDValue N1 = N->getOperand(1);
12791   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12792   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12793   EVT VT = N->getValueType(0);
12794   SDLoc DL(N);
12795   const TargetOptions &Options = DAG.getTarget().Options;
12796   const SDNodeFlags Flags = N->getFlags();
12797 
12798   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12799     return R;
12800 
12801   // fold vector ops
12802   if (VT.isVector())
12803     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12804       return FoldedVOp;
12805 
12806   // fold (fsub c1, c2) -> c1-c2
12807   if (N0CFP && N1CFP)
12808     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12809 
12810   if (SDValue NewSel = foldBinOpIntoSelect(N))
12811     return NewSel;
12812 
12813   // (fsub A, 0) -> A
12814   if (N1CFP && N1CFP->isZero()) {
12815     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
12816         Flags.hasNoSignedZeros()) {
12817       return N0;
12818     }
12819   }
12820 
12821   if (N0 == N1) {
12822     // (fsub x, x) -> 0.0
12823     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
12824       return DAG.getConstantFP(0.0f, DL, VT);
12825   }
12826 
12827   // (fsub -0.0, N1) -> -N1
12828   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12829   //       FSUB does not specify the sign bit of a NaN. Also note that for
12830   //       the same reason, the inverse transform is not safe, unless fast math
12831   //       flags are in play.
12832   if (N0CFP && N0CFP->isZero()) {
12833     if (N0CFP->isNegative() ||
12834         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
12835       if (SDValue NegN1 =
12836               TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
12837         return NegN1;
12838       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12839         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12840     }
12841   }
12842 
12843   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12844        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12845       N1.getOpcode() == ISD::FADD) {
12846     // X - (X + Y) -> -Y
12847     if (N0 == N1->getOperand(0))
12848       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12849     // X - (Y + X) -> -Y
12850     if (N0 == N1->getOperand(1))
12851       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12852   }
12853 
12854   // fold (fsub A, (fneg B)) -> (fadd A, B)
12855   if (SDValue NegN1 =
12856           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
12857     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags);
12858 
12859   // FSUB -> FMA combines:
12860   if (SDValue Fused = visitFSUBForFMACombine(N)) {
12861     AddToWorklist(Fused.getNode());
12862     return Fused;
12863   }
12864 
12865   return SDValue();
12866 }
12867 
12868 SDValue DAGCombiner::visitFMUL(SDNode *N) {
12869   SDValue N0 = N->getOperand(0);
12870   SDValue N1 = N->getOperand(1);
12871   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12872   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12873   EVT VT = N->getValueType(0);
12874   SDLoc DL(N);
12875   const TargetOptions &Options = DAG.getTarget().Options;
12876   const SDNodeFlags Flags = N->getFlags();
12877 
12878   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12879     return R;
12880 
12881   // fold vector ops
12882   if (VT.isVector()) {
12883     // This just handles C1 * C2 for vectors. Other vector folds are below.
12884     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12885       return FoldedVOp;
12886   }
12887 
12888   // fold (fmul c1, c2) -> c1*c2
12889   if (N0CFP && N1CFP)
12890     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12891 
12892   // canonicalize constant to RHS
12893   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12894      !isConstantFPBuildVectorOrConstantFP(N1))
12895     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12896 
12897   if (SDValue NewSel = foldBinOpIntoSelect(N))
12898     return NewSel;
12899 
12900   if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12901       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12902     // fold (fmul A, 0) -> 0
12903     if (N1CFP && N1CFP->isZero())
12904       return N1;
12905   }
12906 
12907   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12908     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12909     if (isConstantFPBuildVectorOrConstantFP(N1) &&
12910         N0.getOpcode() == ISD::FMUL) {
12911       SDValue N00 = N0.getOperand(0);
12912       SDValue N01 = N0.getOperand(1);
12913       // Avoid an infinite loop by making sure that N00 is not a constant
12914       // (the inner multiply has not been constant folded yet).
12915       if (isConstantFPBuildVectorOrConstantFP(N01) &&
12916           !isConstantFPBuildVectorOrConstantFP(N00)) {
12917         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12918         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12919       }
12920     }
12921 
12922     // Match a special-case: we convert X * 2.0 into fadd.
12923     // fmul (fadd X, X), C -> fmul X, 2.0 * C
12924     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12925         N0.getOperand(0) == N0.getOperand(1)) {
12926       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12927       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12928       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12929     }
12930   }
12931 
12932   // fold (fmul X, 2.0) -> (fadd X, X)
12933   if (N1CFP && N1CFP->isExactlyValue(+2.0))
12934     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12935 
12936   // fold (fmul X, -1.0) -> (fneg X)
12937   if (N1CFP && N1CFP->isExactlyValue(-1.0))
12938     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12939       return DAG.getNode(ISD::FNEG, DL, VT, N0);
12940 
12941   // -N0 * -N1 --> N0 * N1
12942   TargetLowering::NegatibleCost CostN0 =
12943       TargetLowering::NegatibleCost::Expensive;
12944   TargetLowering::NegatibleCost CostN1 =
12945       TargetLowering::NegatibleCost::Expensive;
12946   SDValue NegN0 =
12947       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
12948   SDValue NegN1 =
12949       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
12950   if (NegN0 && NegN1 &&
12951       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
12952        CostN1 == TargetLowering::NegatibleCost::Cheaper))
12953     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
12954 
12955   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12956   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12957   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12958       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12959       TLI.isOperationLegal(ISD::FABS, VT)) {
12960     SDValue Select = N0, X = N1;
12961     if (Select.getOpcode() != ISD::SELECT)
12962       std::swap(Select, X);
12963 
12964     SDValue Cond = Select.getOperand(0);
12965     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12966     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12967 
12968     if (TrueOpnd && FalseOpnd &&
12969         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12970         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12971         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12972       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12973       switch (CC) {
12974       default: break;
12975       case ISD::SETOLT:
12976       case ISD::SETULT:
12977       case ISD::SETOLE:
12978       case ISD::SETULE:
12979       case ISD::SETLT:
12980       case ISD::SETLE:
12981         std::swap(TrueOpnd, FalseOpnd);
12982         LLVM_FALLTHROUGH;
12983       case ISD::SETOGT:
12984       case ISD::SETUGT:
12985       case ISD::SETOGE:
12986       case ISD::SETUGE:
12987       case ISD::SETGT:
12988       case ISD::SETGE:
12989         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12990             TLI.isOperationLegal(ISD::FNEG, VT))
12991           return DAG.getNode(ISD::FNEG, DL, VT,
12992                    DAG.getNode(ISD::FABS, DL, VT, X));
12993         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12994           return DAG.getNode(ISD::FABS, DL, VT, X);
12995 
12996         break;
12997       }
12998     }
12999   }
13000 
13001   // FMUL -> FMA combines:
13002   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
13003     AddToWorklist(Fused.getNode());
13004     return Fused;
13005   }
13006 
13007   return SDValue();
13008 }
13009 
13010 SDValue DAGCombiner::visitFMA(SDNode *N) {
13011   SDValue N0 = N->getOperand(0);
13012   SDValue N1 = N->getOperand(1);
13013   SDValue N2 = N->getOperand(2);
13014   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13015   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13016   EVT VT = N->getValueType(0);
13017   SDLoc DL(N);
13018   const TargetOptions &Options = DAG.getTarget().Options;
13019 
13020   // FMA nodes have flags that propagate to the created nodes.
13021   const SDNodeFlags Flags = N->getFlags();
13022   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
13023 
13024   // Constant fold FMA.
13025   if (isa<ConstantFPSDNode>(N0) &&
13026       isa<ConstantFPSDNode>(N1) &&
13027       isa<ConstantFPSDNode>(N2)) {
13028     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
13029   }
13030 
13031   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
13032   TargetLowering::NegatibleCost CostN0 =
13033       TargetLowering::NegatibleCost::Expensive;
13034   TargetLowering::NegatibleCost CostN1 =
13035       TargetLowering::NegatibleCost::Expensive;
13036   SDValue NegN0 =
13037       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13038   SDValue NegN1 =
13039       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13040   if (NegN0 && NegN1 &&
13041       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13042        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13043     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
13044 
13045   if (UnsafeFPMath) {
13046     if (N0CFP && N0CFP->isZero())
13047       return N2;
13048     if (N1CFP && N1CFP->isZero())
13049       return N2;
13050   }
13051   // TODO: The FMA node should have flags that propagate to these nodes.
13052   if (N0CFP && N0CFP->isExactlyValue(1.0))
13053     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
13054   if (N1CFP && N1CFP->isExactlyValue(1.0))
13055     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
13056 
13057   // Canonicalize (fma c, x, y) -> (fma x, c, y)
13058   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13059      !isConstantFPBuildVectorOrConstantFP(N1))
13060     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
13061 
13062   if (UnsafeFPMath) {
13063     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
13064     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
13065         isConstantFPBuildVectorOrConstantFP(N1) &&
13066         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
13067       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13068                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
13069                                      Flags), Flags);
13070     }
13071 
13072     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
13073     if (N0.getOpcode() == ISD::FMUL &&
13074         isConstantFPBuildVectorOrConstantFP(N1) &&
13075         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13076       return DAG.getNode(ISD::FMA, DL, VT,
13077                          N0.getOperand(0),
13078                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
13079                                      Flags),
13080                          N2);
13081     }
13082   }
13083 
13084   // (fma x, 1, y) -> (fadd x, y)
13085   // (fma x, -1, y) -> (fadd (fneg x), y)
13086   if (N1CFP) {
13087     if (N1CFP->isExactlyValue(1.0))
13088       // TODO: The FMA node should have flags that propagate to this node.
13089       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
13090 
13091     if (N1CFP->isExactlyValue(-1.0) &&
13092         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
13093       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
13094       AddToWorklist(RHSNeg.getNode());
13095       // TODO: The FMA node should have flags that propagate to this node.
13096       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
13097     }
13098 
13099     // fma (fneg x), K, y -> fma x -K, y
13100     if (N0.getOpcode() == ISD::FNEG &&
13101         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13102          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
13103                                               ForCodeSize)))) {
13104       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13105                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
13106     }
13107   }
13108 
13109   if (UnsafeFPMath) {
13110     // (fma x, c, x) -> (fmul x, (c+1))
13111     if (N1CFP && N0 == N2) {
13112       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13113                          DAG.getNode(ISD::FADD, DL, VT, N1,
13114                                      DAG.getConstantFP(1.0, DL, VT), Flags),
13115                          Flags);
13116     }
13117 
13118     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
13119     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
13120       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13121                          DAG.getNode(ISD::FADD, DL, VT, N1,
13122                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
13123                          Flags);
13124     }
13125   }
13126 
13127   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
13128   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
13129   if (!TLI.isFNegFree(VT))
13130     if (SDValue Neg = TLI.getCheaperNegatedExpression(
13131             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
13132       return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags);
13133   return SDValue();
13134 }
13135 
13136 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13137 // reciprocal.
13138 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
13139 // Notice that this is not always beneficial. One reason is different targets
13140 // may have different costs for FDIV and FMUL, so sometimes the cost of two
13141 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
13142 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
13143 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
13144   // TODO: Limit this transform based on optsize/minsize - it always creates at
13145   //       least 1 extra instruction. But the perf win may be substantial enough
13146   //       that only minsize should restrict this.
13147   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
13148   const SDNodeFlags Flags = N->getFlags();
13149   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
13150     return SDValue();
13151 
13152   // Skip if current node is a reciprocal/fneg-reciprocal.
13153   SDValue N0 = N->getOperand(0);
13154   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
13155   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
13156     return SDValue();
13157 
13158   // Exit early if the target does not want this transform or if there can't
13159   // possibly be enough uses of the divisor to make the transform worthwhile.
13160   SDValue N1 = N->getOperand(1);
13161   unsigned MinUses = TLI.combineRepeatedFPDivisors();
13162 
13163   // For splat vectors, scale the number of uses by the splat factor. If we can
13164   // convert the division into a scalar op, that will likely be much faster.
13165   unsigned NumElts = 1;
13166   EVT VT = N->getValueType(0);
13167   if (VT.isVector() && DAG.isSplatValue(N1))
13168     NumElts = VT.getVectorNumElements();
13169 
13170   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
13171     return SDValue();
13172 
13173   // Find all FDIV users of the same divisor.
13174   // Use a set because duplicates may be present in the user list.
13175   SetVector<SDNode *> Users;
13176   for (auto *U : N1->uses()) {
13177     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
13178       // This division is eligible for optimization only if global unsafe math
13179       // is enabled or if this division allows reciprocal formation.
13180       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
13181         Users.insert(U);
13182     }
13183   }
13184 
13185   // Now that we have the actual number of divisor uses, make sure it meets
13186   // the minimum threshold specified by the target.
13187   if ((Users.size() * NumElts) < MinUses)
13188     return SDValue();
13189 
13190   SDLoc DL(N);
13191   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
13192   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
13193 
13194   // Dividend / Divisor -> Dividend * Reciprocal
13195   for (auto *U : Users) {
13196     SDValue Dividend = U->getOperand(0);
13197     if (Dividend != FPOne) {
13198       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
13199                                     Reciprocal, Flags);
13200       CombineTo(U, NewNode);
13201     } else if (U != Reciprocal.getNode()) {
13202       // In the absence of fast-math-flags, this user node is always the
13203       // same node as Reciprocal, but with FMF they may be different nodes.
13204       CombineTo(U, Reciprocal);
13205     }
13206   }
13207   return SDValue(N, 0);  // N was replaced.
13208 }
13209 
13210 SDValue DAGCombiner::visitFDIV(SDNode *N) {
13211   SDValue N0 = N->getOperand(0);
13212   SDValue N1 = N->getOperand(1);
13213   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13214   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13215   EVT VT = N->getValueType(0);
13216   SDLoc DL(N);
13217   const TargetOptions &Options = DAG.getTarget().Options;
13218   SDNodeFlags Flags = N->getFlags();
13219 
13220   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13221     return R;
13222 
13223   // fold vector ops
13224   if (VT.isVector())
13225     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13226       return FoldedVOp;
13227 
13228   // fold (fdiv c1, c2) -> c1/c2
13229   if (N0CFP && N1CFP)
13230     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
13231 
13232   if (SDValue NewSel = foldBinOpIntoSelect(N))
13233     return NewSel;
13234 
13235   if (SDValue V = combineRepeatedFPDivisors(N))
13236     return V;
13237 
13238   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
13239     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
13240     if (N1CFP) {
13241       // Compute the reciprocal 1.0 / c2.
13242       const APFloat &N1APF = N1CFP->getValueAPF();
13243       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
13244       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
13245       // Only do the transform if the reciprocal is a legal fp immediate that
13246       // isn't too nasty (eg NaN, denormal, ...).
13247       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
13248           (!LegalOperations ||
13249            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
13250            // backend)... we should handle this gracefully after Legalize.
13251            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
13252            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13253            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
13254         return DAG.getNode(ISD::FMUL, DL, VT, N0,
13255                            DAG.getConstantFP(Recip, DL, VT), Flags);
13256     }
13257 
13258     // If this FDIV is part of a reciprocal square root, it may be folded
13259     // into a target-specific square root estimate instruction.
13260     if (N1.getOpcode() == ISD::FSQRT) {
13261       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
13262         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13263     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
13264                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13265       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
13266                                           Flags)) {
13267         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
13268         AddToWorklist(RV.getNode());
13269         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13270       }
13271     } else if (N1.getOpcode() == ISD::FP_ROUND &&
13272                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13273       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
13274                                           Flags)) {
13275         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
13276         AddToWorklist(RV.getNode());
13277         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13278       }
13279     } else if (N1.getOpcode() == ISD::FMUL) {
13280       // Look through an FMUL. Even though this won't remove the FDIV directly,
13281       // it's still worthwhile to get rid of the FSQRT if possible.
13282       SDValue Sqrt, Y;
13283       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13284         Sqrt = N1.getOperand(0);
13285         Y = N1.getOperand(1);
13286       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
13287         Sqrt = N1.getOperand(1);
13288         Y = N1.getOperand(0);
13289       }
13290       if (Sqrt.getNode()) {
13291         // If the other multiply operand is known positive, pull it into the
13292         // sqrt. That will eliminate the division if we convert to an estimate:
13293         // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
13294         // TODO: Also fold the case where A == Z (fabs is missing).
13295         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
13296             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() &&
13297             Y.getOpcode() == ISD::FABS && Y.hasOneUse()) {
13298           SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0),
13299                                    Y.getOperand(0), Flags);
13300           SDValue AAZ =
13301               DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags);
13302           if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
13303             return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags);
13304 
13305           // Estimate creation failed. Clean up speculatively created nodes.
13306           recursivelyDeleteUnusedNodes(AAZ.getNode());
13307         }
13308 
13309         // We found a FSQRT, so try to make this fold:
13310         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
13311         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
13312           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags);
13313           AddToWorklist(Div.getNode());
13314           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags);
13315         }
13316       }
13317     }
13318 
13319     // Fold into a reciprocal estimate and multiply instead of a real divide.
13320     if (Options.NoInfsFPMath || Flags.hasNoInfs())
13321       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
13322         return RV;
13323   }
13324 
13325   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
13326   TargetLowering::NegatibleCost CostN0 =
13327       TargetLowering::NegatibleCost::Expensive;
13328   TargetLowering::NegatibleCost CostN1 =
13329       TargetLowering::NegatibleCost::Expensive;
13330   SDValue NegN0 =
13331       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13332   SDValue NegN1 =
13333       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13334   if (NegN0 && NegN1 &&
13335       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13336        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13337     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags);
13338 
13339   return SDValue();
13340 }
13341 
13342 SDValue DAGCombiner::visitFREM(SDNode *N) {
13343   SDValue N0 = N->getOperand(0);
13344   SDValue N1 = N->getOperand(1);
13345   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13346   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13347   EVT VT = N->getValueType(0);
13348   SDNodeFlags Flags = N->getFlags();
13349 
13350   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13351     return R;
13352 
13353   // fold (frem c1, c2) -> fmod(c1,c2)
13354   if (N0CFP && N1CFP)
13355     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
13356 
13357   if (SDValue NewSel = foldBinOpIntoSelect(N))
13358     return NewSel;
13359 
13360   return SDValue();
13361 }
13362 
13363 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
13364   SDNodeFlags Flags = N->getFlags();
13365   const TargetOptions &Options = DAG.getTarget().Options;
13366 
13367   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
13368   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
13369   if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
13370       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
13371     return SDValue();
13372 
13373   SDValue N0 = N->getOperand(0);
13374   if (TLI.isFsqrtCheap(N0, DAG))
13375     return SDValue();
13376 
13377   // FSQRT nodes have flags that propagate to the created nodes.
13378   return buildSqrtEstimate(N0, Flags);
13379 }
13380 
13381 /// copysign(x, fp_extend(y)) -> copysign(x, y)
13382 /// copysign(x, fp_round(y)) -> copysign(x, y)
13383 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
13384   SDValue N1 = N->getOperand(1);
13385   if ((N1.getOpcode() == ISD::FP_EXTEND ||
13386        N1.getOpcode() == ISD::FP_ROUND)) {
13387     // Do not optimize out type conversion of f128 type yet.
13388     // For some targets like x86_64, configuration is changed to keep one f128
13389     // value in one SSE register, but instruction selection cannot handle
13390     // FCOPYSIGN on SSE registers yet.
13391     EVT N1VT = N1->getValueType(0);
13392     EVT N1Op0VT = N1->getOperand(0).getValueType();
13393     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
13394   }
13395   return false;
13396 }
13397 
13398 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
13399   SDValue N0 = N->getOperand(0);
13400   SDValue N1 = N->getOperand(1);
13401   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
13402   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
13403   EVT VT = N->getValueType(0);
13404 
13405   if (N0CFP && N1CFP) // Constant fold
13406     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
13407 
13408   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
13409     const APFloat &V = N1C->getValueAPF();
13410     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
13411     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
13412     if (!V.isNegative()) {
13413       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
13414         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13415     } else {
13416       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13417         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
13418                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
13419     }
13420   }
13421 
13422   // copysign(fabs(x), y) -> copysign(x, y)
13423   // copysign(fneg(x), y) -> copysign(x, y)
13424   // copysign(copysign(x,z), y) -> copysign(x, y)
13425   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
13426       N0.getOpcode() == ISD::FCOPYSIGN)
13427     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
13428 
13429   // copysign(x, abs(y)) -> abs(x)
13430   if (N1.getOpcode() == ISD::FABS)
13431     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13432 
13433   // copysign(x, copysign(y,z)) -> copysign(x, z)
13434   if (N1.getOpcode() == ISD::FCOPYSIGN)
13435     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
13436 
13437   // copysign(x, fp_extend(y)) -> copysign(x, y)
13438   // copysign(x, fp_round(y)) -> copysign(x, y)
13439   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
13440     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
13441 
13442   return SDValue();
13443 }
13444 
13445 SDValue DAGCombiner::visitFPOW(SDNode *N) {
13446   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
13447   if (!ExponentC)
13448     return SDValue();
13449 
13450   // Try to convert x ** (1/3) into cube root.
13451   // TODO: Handle the various flavors of long double.
13452   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
13453   //       Some range near 1/3 should be fine.
13454   EVT VT = N->getValueType(0);
13455   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
13456       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
13457     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
13458     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
13459     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
13460     // For regular numbers, rounding may cause the results to differ.
13461     // Therefore, we require { nsz ninf nnan afn } for this transform.
13462     // TODO: We could select out the special cases if we don't have nsz/ninf.
13463     SDNodeFlags Flags = N->getFlags();
13464     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
13465         !Flags.hasApproximateFuncs())
13466       return SDValue();
13467 
13468     // Do not create a cbrt() libcall if the target does not have it, and do not
13469     // turn a pow that has lowering support into a cbrt() libcall.
13470     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
13471         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
13472          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
13473       return SDValue();
13474 
13475     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
13476   }
13477 
13478   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
13479   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
13480   // TODO: This could be extended (using a target hook) to handle smaller
13481   // power-of-2 fractional exponents.
13482   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
13483   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
13484   if (ExponentIs025 || ExponentIs075) {
13485     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
13486     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
13487     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
13488     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
13489     // For regular numbers, rounding may cause the results to differ.
13490     // Therefore, we require { nsz ninf afn } for this transform.
13491     // TODO: We could select out the special cases if we don't have nsz/ninf.
13492     SDNodeFlags Flags = N->getFlags();
13493 
13494     // We only need no signed zeros for the 0.25 case.
13495     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
13496         !Flags.hasApproximateFuncs())
13497       return SDValue();
13498 
13499     // Don't double the number of libcalls. We are trying to inline fast code.
13500     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
13501       return SDValue();
13502 
13503     // Assume that libcalls are the smallest code.
13504     // TODO: This restriction should probably be lifted for vectors.
13505     if (ForCodeSize)
13506       return SDValue();
13507 
13508     // pow(X, 0.25) --> sqrt(sqrt(X))
13509     SDLoc DL(N);
13510     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
13511     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
13512     if (ExponentIs025)
13513       return SqrtSqrt;
13514     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
13515     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
13516   }
13517 
13518   return SDValue();
13519 }
13520 
13521 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
13522                                const TargetLowering &TLI) {
13523   // This optimization is guarded by a function attribute because it may produce
13524   // unexpected results. Ie, programs may be relying on the platform-specific
13525   // undefined behavior when the float-to-int conversion overflows.
13526   const Function &F = DAG.getMachineFunction().getFunction();
13527   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
13528   if (StrictOverflow.getValueAsString().equals("false"))
13529     return SDValue();
13530 
13531   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
13532   // replacing casts with a libcall. We also must be allowed to ignore -0.0
13533   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
13534   // conversions would return +0.0.
13535   // FIXME: We should be able to use node-level FMF here.
13536   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
13537   EVT VT = N->getValueType(0);
13538   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
13539       !DAG.getTarget().Options.NoSignedZerosFPMath)
13540     return SDValue();
13541 
13542   // fptosi/fptoui round towards zero, so converting from FP to integer and
13543   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
13544   SDValue N0 = N->getOperand(0);
13545   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
13546       N0.getOperand(0).getValueType() == VT)
13547     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13548 
13549   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
13550       N0.getOperand(0).getValueType() == VT)
13551     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13552 
13553   return SDValue();
13554 }
13555 
13556 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
13557   SDValue N0 = N->getOperand(0);
13558   EVT VT = N->getValueType(0);
13559   EVT OpVT = N0.getValueType();
13560 
13561   // [us]itofp(undef) = 0, because the result value is bounded.
13562   if (N0.isUndef())
13563     return DAG.getConstantFP(0.0, SDLoc(N), VT);
13564 
13565   // fold (sint_to_fp c1) -> c1fp
13566   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13567       // ...but only if the target supports immediate floating-point values
13568       (!LegalOperations ||
13569        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13570     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13571 
13572   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
13573   // but UINT_TO_FP is legal on this target, try to convert.
13574   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
13575       hasOperation(ISD::UINT_TO_FP, OpVT)) {
13576     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
13577     if (DAG.SignBitIsZero(N0))
13578       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13579   }
13580 
13581   // The next optimizations are desirable only if SELECT_CC can be lowered.
13582   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
13583   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
13584       !VT.isVector() &&
13585       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13586     SDLoc DL(N);
13587     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
13588                          DAG.getConstantFP(0.0, DL, VT));
13589   }
13590 
13591   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
13592   //      (select (setcc x, y, cc), 1.0, 0.0)
13593   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
13594       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
13595       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13596     SDLoc DL(N);
13597     return DAG.getSelect(DL, VT, N0.getOperand(0),
13598                          DAG.getConstantFP(1.0, DL, VT),
13599                          DAG.getConstantFP(0.0, DL, VT));
13600   }
13601 
13602   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13603     return FTrunc;
13604 
13605   return SDValue();
13606 }
13607 
13608 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
13609   SDValue N0 = N->getOperand(0);
13610   EVT VT = N->getValueType(0);
13611   EVT OpVT = N0.getValueType();
13612 
13613   // [us]itofp(undef) = 0, because the result value is bounded.
13614   if (N0.isUndef())
13615     return DAG.getConstantFP(0.0, SDLoc(N), VT);
13616 
13617   // fold (uint_to_fp c1) -> c1fp
13618   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13619       // ...but only if the target supports immediate floating-point values
13620       (!LegalOperations ||
13621        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13622     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13623 
13624   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
13625   // but SINT_TO_FP is legal on this target, try to convert.
13626   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
13627       hasOperation(ISD::SINT_TO_FP, OpVT)) {
13628     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
13629     if (DAG.SignBitIsZero(N0))
13630       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13631   }
13632 
13633   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
13634   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
13635       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13636     SDLoc DL(N);
13637     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
13638                          DAG.getConstantFP(0.0, DL, VT));
13639   }
13640 
13641   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13642     return FTrunc;
13643 
13644   return SDValue();
13645 }
13646 
13647 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
13648 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
13649   SDValue N0 = N->getOperand(0);
13650   EVT VT = N->getValueType(0);
13651 
13652   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
13653     return SDValue();
13654 
13655   SDValue Src = N0.getOperand(0);
13656   EVT SrcVT = Src.getValueType();
13657   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
13658   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
13659 
13660   // We can safely assume the conversion won't overflow the output range,
13661   // because (for example) (uint8_t)18293.f is undefined behavior.
13662 
13663   // Since we can assume the conversion won't overflow, our decision as to
13664   // whether the input will fit in the float should depend on the minimum
13665   // of the input range and output range.
13666 
13667   // This means this is also safe for a signed input and unsigned output, since
13668   // a negative input would lead to undefined behavior.
13669   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13670   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13671   unsigned ActualSize = std::min(InputSize, OutputSize);
13672   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13673 
13674   // We can only fold away the float conversion if the input range can be
13675   // represented exactly in the float range.
13676   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13677     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13678       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
13679                                                        : ISD::ZERO_EXTEND;
13680       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13681     }
13682     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13683       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13684     return DAG.getBitcast(VT, Src);
13685   }
13686   return SDValue();
13687 }
13688 
13689 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13690   SDValue N0 = N->getOperand(0);
13691   EVT VT = N->getValueType(0);
13692 
13693   // fold (fp_to_sint undef) -> undef
13694   if (N0.isUndef())
13695     return DAG.getUNDEF(VT);
13696 
13697   // fold (fp_to_sint c1fp) -> c1
13698   if (isConstantFPBuildVectorOrConstantFP(N0))
13699     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13700 
13701   return FoldIntToFPToInt(N, DAG);
13702 }
13703 
13704 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13705   SDValue N0 = N->getOperand(0);
13706   EVT VT = N->getValueType(0);
13707 
13708   // fold (fp_to_uint undef) -> undef
13709   if (N0.isUndef())
13710     return DAG.getUNDEF(VT);
13711 
13712   // fold (fp_to_uint c1fp) -> c1
13713   if (isConstantFPBuildVectorOrConstantFP(N0))
13714     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13715 
13716   return FoldIntToFPToInt(N, DAG);
13717 }
13718 
13719 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13720   SDValue N0 = N->getOperand(0);
13721   SDValue N1 = N->getOperand(1);
13722   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13723   EVT VT = N->getValueType(0);
13724 
13725   // fold (fp_round c1fp) -> c1fp
13726   if (N0CFP)
13727     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13728 
13729   // fold (fp_round (fp_extend x)) -> x
13730   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
13731     return N0.getOperand(0);
13732 
13733   // fold (fp_round (fp_round x)) -> (fp_round x)
13734   if (N0.getOpcode() == ISD::FP_ROUND) {
13735     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13736     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13737 
13738     // Skip this folding if it results in an fp_round from f80 to f16.
13739     //
13740     // f80 to f16 always generates an expensive (and as yet, unimplemented)
13741     // libcall to __truncxfhf2 instead of selecting native f16 conversion
13742     // instructions from f32 or f64.  Moreover, the first (value-preserving)
13743     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13744     // x86.
13745     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
13746       return SDValue();
13747 
13748     // If the first fp_round isn't a value preserving truncation, it might
13749     // introduce a tie in the second fp_round, that wouldn't occur in the
13750     // single-step fp_round we want to fold to.
13751     // In other words, double rounding isn't the same as rounding.
13752     // Also, this is a value preserving truncation iff both fp_round's are.
13753     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13754       SDLoc DL(N);
13755       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13756                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13757     }
13758   }
13759 
13760   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13761   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13762     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13763                               N0.getOperand(0), N1);
13764     AddToWorklist(Tmp.getNode());
13765     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13766                        Tmp, N0.getOperand(1));
13767   }
13768 
13769   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13770     return NewVSel;
13771 
13772   return SDValue();
13773 }
13774 
13775 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13776   SDValue N0 = N->getOperand(0);
13777   EVT VT = N->getValueType(0);
13778 
13779   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13780   if (N->hasOneUse() &&
13781       N->use_begin()->getOpcode() == ISD::FP_ROUND)
13782     return SDValue();
13783 
13784   // fold (fp_extend c1fp) -> c1fp
13785   if (isConstantFPBuildVectorOrConstantFP(N0))
13786     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13787 
13788   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13789   if (N0.getOpcode() == ISD::FP16_TO_FP &&
13790       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
13791     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13792 
13793   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13794   // value of X.
13795   if (N0.getOpcode() == ISD::FP_ROUND
13796       && N0.getConstantOperandVal(1) == 1) {
13797     SDValue In = N0.getOperand(0);
13798     if (In.getValueType() == VT) return In;
13799     if (VT.bitsLT(In.getValueType()))
13800       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13801                          In, N0.getOperand(1));
13802     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13803   }
13804 
13805   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13806   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13807        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13808     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13809     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13810                                      LN0->getChain(),
13811                                      LN0->getBasePtr(), N0.getValueType(),
13812                                      LN0->getMemOperand());
13813     CombineTo(N, ExtLoad);
13814     CombineTo(N0.getNode(),
13815               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13816                           N0.getValueType(), ExtLoad,
13817                           DAG.getIntPtrConstant(1, SDLoc(N0))),
13818               ExtLoad.getValue(1));
13819     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13820   }
13821 
13822   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13823     return NewVSel;
13824 
13825   return SDValue();
13826 }
13827 
13828 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13829   SDValue N0 = N->getOperand(0);
13830   EVT VT = N->getValueType(0);
13831 
13832   // fold (fceil c1) -> fceil(c1)
13833   if (isConstantFPBuildVectorOrConstantFP(N0))
13834     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13835 
13836   return SDValue();
13837 }
13838 
13839 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13840   SDValue N0 = N->getOperand(0);
13841   EVT VT = N->getValueType(0);
13842 
13843   // fold (ftrunc c1) -> ftrunc(c1)
13844   if (isConstantFPBuildVectorOrConstantFP(N0))
13845     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13846 
13847   // fold ftrunc (known rounded int x) -> x
13848   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13849   // likely to be generated to extract integer from a rounded floating value.
13850   switch (N0.getOpcode()) {
13851   default: break;
13852   case ISD::FRINT:
13853   case ISD::FTRUNC:
13854   case ISD::FNEARBYINT:
13855   case ISD::FFLOOR:
13856   case ISD::FCEIL:
13857     return N0;
13858   }
13859 
13860   return SDValue();
13861 }
13862 
13863 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13864   SDValue N0 = N->getOperand(0);
13865   EVT VT = N->getValueType(0);
13866 
13867   // fold (ffloor c1) -> ffloor(c1)
13868   if (isConstantFPBuildVectorOrConstantFP(N0))
13869     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13870 
13871   return SDValue();
13872 }
13873 
13874 // FIXME: FNEG and FABS have a lot in common; refactor.
13875 SDValue DAGCombiner::visitFNEG(SDNode *N) {
13876   SDValue N0 = N->getOperand(0);
13877   EVT VT = N->getValueType(0);
13878 
13879   // Constant fold FNEG.
13880   if (isConstantFPBuildVectorOrConstantFP(N0))
13881     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13882 
13883   if (SDValue NegN0 =
13884           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
13885     return NegN0;
13886 
13887   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
13888   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
13889   // know it was called from a context with a nsz flag if the input fsub does
13890   // not.
13891   if (N0.getOpcode() == ISD::FSUB &&
13892       (DAG.getTarget().Options.NoSignedZerosFPMath ||
13893        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
13894     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
13895                        N0.getOperand(0), N->getFlags());
13896   }
13897 
13898   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13899   // constant pool values.
13900   if (!TLI.isFNegFree(VT) &&
13901       N0.getOpcode() == ISD::BITCAST &&
13902       N0.getNode()->hasOneUse()) {
13903     SDValue Int = N0.getOperand(0);
13904     EVT IntVT = Int.getValueType();
13905     if (IntVT.isInteger() && !IntVT.isVector()) {
13906       APInt SignMask;
13907       if (N0.getValueType().isVector()) {
13908         // For a vector, get a mask such as 0x80... per scalar element
13909         // and splat it.
13910         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13911         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13912       } else {
13913         // For a scalar, just generate 0x80...
13914         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13915       }
13916       SDLoc DL0(N0);
13917       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13918                         DAG.getConstant(SignMask, DL0, IntVT));
13919       AddToWorklist(Int.getNode());
13920       return DAG.getBitcast(VT, Int);
13921     }
13922   }
13923 
13924   // (fneg (fmul c, x)) -> (fmul -c, x)
13925   if (N0.getOpcode() == ISD::FMUL &&
13926       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13927     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13928     if (CFP1) {
13929       APFloat CVal = CFP1->getValueAPF();
13930       CVal.changeSign();
13931       if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13932                        TLI.isOperationLegal(ISD::ConstantFP, VT)))
13933         return DAG.getNode(
13934             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13935             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13936             N0->getFlags());
13937     }
13938   }
13939 
13940   return SDValue();
13941 }
13942 
13943 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13944                             APFloat (*Op)(const APFloat &, const APFloat &)) {
13945   SDValue N0 = N->getOperand(0);
13946   SDValue N1 = N->getOperand(1);
13947   EVT VT = N->getValueType(0);
13948   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13949   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13950 
13951   if (N0CFP && N1CFP) {
13952     const APFloat &C0 = N0CFP->getValueAPF();
13953     const APFloat &C1 = N1CFP->getValueAPF();
13954     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13955   }
13956 
13957   // Canonicalize to constant on RHS.
13958   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13959       !isConstantFPBuildVectorOrConstantFP(N1))
13960     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13961 
13962   return SDValue();
13963 }
13964 
13965 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13966   return visitFMinMax(DAG, N, minnum);
13967 }
13968 
13969 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13970   return visitFMinMax(DAG, N, maxnum);
13971 }
13972 
13973 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13974   return visitFMinMax(DAG, N, minimum);
13975 }
13976 
13977 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13978   return visitFMinMax(DAG, N, maximum);
13979 }
13980 
13981 SDValue DAGCombiner::visitFABS(SDNode *N) {
13982   SDValue N0 = N->getOperand(0);
13983   EVT VT = N->getValueType(0);
13984 
13985   // fold (fabs c1) -> fabs(c1)
13986   if (isConstantFPBuildVectorOrConstantFP(N0))
13987     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13988 
13989   // fold (fabs (fabs x)) -> (fabs x)
13990   if (N0.getOpcode() == ISD::FABS)
13991     return N->getOperand(0);
13992 
13993   // fold (fabs (fneg x)) -> (fabs x)
13994   // fold (fabs (fcopysign x, y)) -> (fabs x)
13995   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13996     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13997 
13998   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13999   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
14000     SDValue Int = N0.getOperand(0);
14001     EVT IntVT = Int.getValueType();
14002     if (IntVT.isInteger() && !IntVT.isVector()) {
14003       APInt SignMask;
14004       if (N0.getValueType().isVector()) {
14005         // For a vector, get a mask such as 0x7f... per scalar element
14006         // and splat it.
14007         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
14008         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
14009       } else {
14010         // For a scalar, just generate 0x7f...
14011         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
14012       }
14013       SDLoc DL(N0);
14014       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
14015                         DAG.getConstant(SignMask, DL, IntVT));
14016       AddToWorklist(Int.getNode());
14017       return DAG.getBitcast(N->getValueType(0), Int);
14018     }
14019   }
14020 
14021   return SDValue();
14022 }
14023 
14024 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
14025   SDValue Chain = N->getOperand(0);
14026   SDValue N1 = N->getOperand(1);
14027   SDValue N2 = N->getOperand(2);
14028 
14029   // If N is a constant we could fold this into a fallthrough or unconditional
14030   // branch. However that doesn't happen very often in normal code, because
14031   // Instcombine/SimplifyCFG should have handled the available opportunities.
14032   // If we did this folding here, it would be necessary to update the
14033   // MachineBasicBlock CFG, which is awkward.
14034 
14035   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
14036   // on the target.
14037   if (N1.getOpcode() == ISD::SETCC &&
14038       TLI.isOperationLegalOrCustom(ISD::BR_CC,
14039                                    N1.getOperand(0).getValueType())) {
14040     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14041                        Chain, N1.getOperand(2),
14042                        N1.getOperand(0), N1.getOperand(1), N2);
14043   }
14044 
14045   if (N1.hasOneUse()) {
14046     // rebuildSetCC calls visitXor which may change the Chain when there is a
14047     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
14048     HandleSDNode ChainHandle(Chain);
14049     if (SDValue NewN1 = rebuildSetCC(N1))
14050       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
14051                          ChainHandle.getValue(), NewN1, N2);
14052   }
14053 
14054   return SDValue();
14055 }
14056 
14057 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
14058   if (N.getOpcode() == ISD::SRL ||
14059       (N.getOpcode() == ISD::TRUNCATE &&
14060        (N.getOperand(0).hasOneUse() &&
14061         N.getOperand(0).getOpcode() == ISD::SRL))) {
14062     // Look pass the truncate.
14063     if (N.getOpcode() == ISD::TRUNCATE)
14064       N = N.getOperand(0);
14065 
14066     // Match this pattern so that we can generate simpler code:
14067     //
14068     //   %a = ...
14069     //   %b = and i32 %a, 2
14070     //   %c = srl i32 %b, 1
14071     //   brcond i32 %c ...
14072     //
14073     // into
14074     //
14075     //   %a = ...
14076     //   %b = and i32 %a, 2
14077     //   %c = setcc eq %b, 0
14078     //   brcond %c ...
14079     //
14080     // This applies only when the AND constant value has one bit set and the
14081     // SRL constant is equal to the log2 of the AND constant. The back-end is
14082     // smart enough to convert the result into a TEST/JMP sequence.
14083     SDValue Op0 = N.getOperand(0);
14084     SDValue Op1 = N.getOperand(1);
14085 
14086     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
14087       SDValue AndOp1 = Op0.getOperand(1);
14088 
14089       if (AndOp1.getOpcode() == ISD::Constant) {
14090         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
14091 
14092         if (AndConst.isPowerOf2() &&
14093             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
14094           SDLoc DL(N);
14095           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
14096                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
14097                               ISD::SETNE);
14098         }
14099       }
14100     }
14101   }
14102 
14103   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
14104   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
14105   if (N.getOpcode() == ISD::XOR) {
14106     // Because we may call this on a speculatively constructed
14107     // SimplifiedSetCC Node, we need to simplify this node first.
14108     // Ideally this should be folded into SimplifySetCC and not
14109     // here. For now, grab a handle to N so we don't lose it from
14110     // replacements interal to the visit.
14111     HandleSDNode XORHandle(N);
14112     while (N.getOpcode() == ISD::XOR) {
14113       SDValue Tmp = visitXOR(N.getNode());
14114       // No simplification done.
14115       if (!Tmp.getNode())
14116         break;
14117       // Returning N is form in-visit replacement that may invalidated
14118       // N. Grab value from Handle.
14119       if (Tmp.getNode() == N.getNode())
14120         N = XORHandle.getValue();
14121       else // Node simplified. Try simplifying again.
14122         N = Tmp;
14123     }
14124 
14125     if (N.getOpcode() != ISD::XOR)
14126       return N;
14127 
14128     SDValue Op0 = N->getOperand(0);
14129     SDValue Op1 = N->getOperand(1);
14130 
14131     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
14132       bool Equal = false;
14133       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
14134       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
14135           Op0.getValueType() == MVT::i1) {
14136         N = Op0;
14137         Op0 = N->getOperand(0);
14138         Op1 = N->getOperand(1);
14139         Equal = true;
14140       }
14141 
14142       EVT SetCCVT = N.getValueType();
14143       if (LegalTypes)
14144         SetCCVT = getSetCCResultType(SetCCVT);
14145       // Replace the uses of XOR with SETCC
14146       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
14147                           Equal ? ISD::SETEQ : ISD::SETNE);
14148     }
14149   }
14150 
14151   return SDValue();
14152 }
14153 
14154 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
14155 //
14156 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
14157   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
14158   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
14159 
14160   // If N is a constant we could fold this into a fallthrough or unconditional
14161   // branch. However that doesn't happen very often in normal code, because
14162   // Instcombine/SimplifyCFG should have handled the available opportunities.
14163   // If we did this folding here, it would be necessary to update the
14164   // MachineBasicBlock CFG, which is awkward.
14165 
14166   // Use SimplifySetCC to simplify SETCC's.
14167   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
14168                                CondLHS, CondRHS, CC->get(), SDLoc(N),
14169                                false);
14170   if (Simp.getNode()) AddToWorklist(Simp.getNode());
14171 
14172   // fold to a simpler setcc
14173   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
14174     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14175                        N->getOperand(0), Simp.getOperand(2),
14176                        Simp.getOperand(0), Simp.getOperand(1),
14177                        N->getOperand(4));
14178 
14179   return SDValue();
14180 }
14181 
14182 /// Return true if 'Use' is a load or a store that uses N as its base pointer
14183 /// and that N may be folded in the load / store addressing mode.
14184 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
14185                                     SelectionDAG &DAG,
14186                                     const TargetLowering &TLI) {
14187   EVT VT;
14188   unsigned AS;
14189 
14190   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
14191     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
14192       return false;
14193     VT = LD->getMemoryVT();
14194     AS = LD->getAddressSpace();
14195   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
14196     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
14197       return false;
14198     VT = ST->getMemoryVT();
14199     AS = ST->getAddressSpace();
14200   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
14201     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
14202       return false;
14203     VT = LD->getMemoryVT();
14204     AS = LD->getAddressSpace();
14205   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
14206     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
14207       return false;
14208     VT = ST->getMemoryVT();
14209     AS = ST->getAddressSpace();
14210   } else
14211     return false;
14212 
14213   TargetLowering::AddrMode AM;
14214   if (N->getOpcode() == ISD::ADD) {
14215     AM.HasBaseReg = true;
14216     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
14217     if (Offset)
14218       // [reg +/- imm]
14219       AM.BaseOffs = Offset->getSExtValue();
14220     else
14221       // [reg +/- reg]
14222       AM.Scale = 1;
14223   } else if (N->getOpcode() == ISD::SUB) {
14224     AM.HasBaseReg = true;
14225     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
14226     if (Offset)
14227       // [reg +/- imm]
14228       AM.BaseOffs = -Offset->getSExtValue();
14229     else
14230       // [reg +/- reg]
14231       AM.Scale = 1;
14232   } else
14233     return false;
14234 
14235   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
14236                                    VT.getTypeForEVT(*DAG.getContext()), AS);
14237 }
14238 
14239 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
14240                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
14241                                      const TargetLowering &TLI) {
14242   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
14243     if (LD->isIndexed())
14244       return false;
14245     EVT VT = LD->getMemoryVT();
14246     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
14247       return false;
14248     Ptr = LD->getBasePtr();
14249   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
14250     if (ST->isIndexed())
14251       return false;
14252     EVT VT = ST->getMemoryVT();
14253     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
14254       return false;
14255     Ptr = ST->getBasePtr();
14256     IsLoad = false;
14257   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
14258     if (LD->isIndexed())
14259       return false;
14260     EVT VT = LD->getMemoryVT();
14261     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
14262         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
14263       return false;
14264     Ptr = LD->getBasePtr();
14265     IsMasked = true;
14266   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
14267     if (ST->isIndexed())
14268       return false;
14269     EVT VT = ST->getMemoryVT();
14270     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
14271         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
14272       return false;
14273     Ptr = ST->getBasePtr();
14274     IsLoad = false;
14275     IsMasked = true;
14276   } else {
14277     return false;
14278   }
14279   return true;
14280 }
14281 
14282 /// Try turning a load/store into a pre-indexed load/store when the base
14283 /// pointer is an add or subtract and it has other uses besides the load/store.
14284 /// After the transformation, the new indexed load/store has effectively folded
14285 /// the add/subtract in and all of its other uses are redirected to the
14286 /// new load/store.
14287 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
14288   if (Level < AfterLegalizeDAG)
14289     return false;
14290 
14291   bool IsLoad = true;
14292   bool IsMasked = false;
14293   SDValue Ptr;
14294   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
14295                                 Ptr, TLI))
14296     return false;
14297 
14298   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
14299   // out.  There is no reason to make this a preinc/predec.
14300   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
14301       Ptr.getNode()->hasOneUse())
14302     return false;
14303 
14304   // Ask the target to do addressing mode selection.
14305   SDValue BasePtr;
14306   SDValue Offset;
14307   ISD::MemIndexedMode AM = ISD::UNINDEXED;
14308   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
14309     return false;
14310 
14311   // Backends without true r+i pre-indexed forms may need to pass a
14312   // constant base with a variable offset so that constant coercion
14313   // will work with the patterns in canonical form.
14314   bool Swapped = false;
14315   if (isa<ConstantSDNode>(BasePtr)) {
14316     std::swap(BasePtr, Offset);
14317     Swapped = true;
14318   }
14319 
14320   // Don't create a indexed load / store with zero offset.
14321   if (isNullConstant(Offset))
14322     return false;
14323 
14324   // Try turning it into a pre-indexed load / store except when:
14325   // 1) The new base ptr is a frame index.
14326   // 2) If N is a store and the new base ptr is either the same as or is a
14327   //    predecessor of the value being stored.
14328   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
14329   //    that would create a cycle.
14330   // 4) All uses are load / store ops that use it as old base ptr.
14331 
14332   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
14333   // (plus the implicit offset) to a register to preinc anyway.
14334   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14335     return false;
14336 
14337   // Check #2.
14338   if (!IsLoad) {
14339     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
14340                            : cast<StoreSDNode>(N)->getValue();
14341 
14342     // Would require a copy.
14343     if (Val == BasePtr)
14344       return false;
14345 
14346     // Would create a cycle.
14347     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
14348       return false;
14349   }
14350 
14351   // Caches for hasPredecessorHelper.
14352   SmallPtrSet<const SDNode *, 32> Visited;
14353   SmallVector<const SDNode *, 16> Worklist;
14354   Worklist.push_back(N);
14355 
14356   // If the offset is a constant, there may be other adds of constants that
14357   // can be folded with this one. We should do this to avoid having to keep
14358   // a copy of the original base pointer.
14359   SmallVector<SDNode *, 16> OtherUses;
14360   if (isa<ConstantSDNode>(Offset))
14361     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
14362                               UE = BasePtr.getNode()->use_end();
14363          UI != UE; ++UI) {
14364       SDUse &Use = UI.getUse();
14365       // Skip the use that is Ptr and uses of other results from BasePtr's
14366       // node (important for nodes that return multiple results).
14367       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
14368         continue;
14369 
14370       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
14371         continue;
14372 
14373       if (Use.getUser()->getOpcode() != ISD::ADD &&
14374           Use.getUser()->getOpcode() != ISD::SUB) {
14375         OtherUses.clear();
14376         break;
14377       }
14378 
14379       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
14380       if (!isa<ConstantSDNode>(Op1)) {
14381         OtherUses.clear();
14382         break;
14383       }
14384 
14385       // FIXME: In some cases, we can be smarter about this.
14386       if (Op1.getValueType() != Offset.getValueType()) {
14387         OtherUses.clear();
14388         break;
14389       }
14390 
14391       OtherUses.push_back(Use.getUser());
14392     }
14393 
14394   if (Swapped)
14395     std::swap(BasePtr, Offset);
14396 
14397   // Now check for #3 and #4.
14398   bool RealUse = false;
14399 
14400   for (SDNode *Use : Ptr.getNode()->uses()) {
14401     if (Use == N)
14402       continue;
14403     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
14404       return false;
14405 
14406     // If Ptr may be folded in addressing mode of other use, then it's
14407     // not profitable to do this transformation.
14408     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
14409       RealUse = true;
14410   }
14411 
14412   if (!RealUse)
14413     return false;
14414 
14415   SDValue Result;
14416   if (!IsMasked) {
14417     if (IsLoad)
14418       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14419     else
14420       Result =
14421           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14422   } else {
14423     if (IsLoad)
14424       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14425                                         Offset, AM);
14426     else
14427       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
14428                                          Offset, AM);
14429   }
14430   ++PreIndexedNodes;
14431   ++NodesCombined;
14432   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
14433              Result.getNode()->dump(&DAG); dbgs() << '\n');
14434   WorklistRemover DeadNodes(*this);
14435   if (IsLoad) {
14436     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14437     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14438   } else {
14439     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14440   }
14441 
14442   // Finally, since the node is now dead, remove it from the graph.
14443   deleteAndRecombine(N);
14444 
14445   if (Swapped)
14446     std::swap(BasePtr, Offset);
14447 
14448   // Replace other uses of BasePtr that can be updated to use Ptr
14449   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
14450     unsigned OffsetIdx = 1;
14451     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
14452       OffsetIdx = 0;
14453     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
14454            BasePtr.getNode() && "Expected BasePtr operand");
14455 
14456     // We need to replace ptr0 in the following expression:
14457     //   x0 * offset0 + y0 * ptr0 = t0
14458     // knowing that
14459     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
14460     //
14461     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
14462     // indexed load/store and the expression that needs to be re-written.
14463     //
14464     // Therefore, we have:
14465     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
14466 
14467     ConstantSDNode *CN =
14468       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
14469     int X0, X1, Y0, Y1;
14470     const APInt &Offset0 = CN->getAPIntValue();
14471     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
14472 
14473     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
14474     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
14475     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
14476     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
14477 
14478     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
14479 
14480     APInt CNV = Offset0;
14481     if (X0 < 0) CNV = -CNV;
14482     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
14483     else CNV = CNV - Offset1;
14484 
14485     SDLoc DL(OtherUses[i]);
14486 
14487     // We can now generate the new expression.
14488     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
14489     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
14490 
14491     SDValue NewUse = DAG.getNode(Opcode,
14492                                  DL,
14493                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
14494     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
14495     deleteAndRecombine(OtherUses[i]);
14496   }
14497 
14498   // Replace the uses of Ptr with uses of the updated base value.
14499   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
14500   deleteAndRecombine(Ptr.getNode());
14501   AddToWorklist(Result.getNode());
14502 
14503   return true;
14504 }
14505 
14506 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
14507                                    SDValue &BasePtr, SDValue &Offset,
14508                                    ISD::MemIndexedMode &AM,
14509                                    SelectionDAG &DAG,
14510                                    const TargetLowering &TLI) {
14511   if (PtrUse == N ||
14512       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
14513     return false;
14514 
14515   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
14516     return false;
14517 
14518   // Don't create a indexed load / store with zero offset.
14519   if (isNullConstant(Offset))
14520     return false;
14521 
14522   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14523     return false;
14524 
14525   SmallPtrSet<const SDNode *, 32> Visited;
14526   for (SDNode *Use : BasePtr.getNode()->uses()) {
14527     if (Use == Ptr.getNode())
14528       continue;
14529 
14530     // No if there's a later user which could perform the index instead.
14531     if (isa<MemSDNode>(Use)) {
14532       bool IsLoad = true;
14533       bool IsMasked = false;
14534       SDValue OtherPtr;
14535       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
14536                                    IsMasked, OtherPtr, TLI)) {
14537         SmallVector<const SDNode *, 2> Worklist;
14538         Worklist.push_back(Use);
14539         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
14540           return false;
14541       }
14542     }
14543 
14544     // If all the uses are load / store addresses, then don't do the
14545     // transformation.
14546     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
14547       for (SDNode *UseUse : Use->uses())
14548         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
14549           return false;
14550     }
14551   }
14552   return true;
14553 }
14554 
14555 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
14556                                          bool &IsMasked, SDValue &Ptr,
14557                                          SDValue &BasePtr, SDValue &Offset,
14558                                          ISD::MemIndexedMode &AM,
14559                                          SelectionDAG &DAG,
14560                                          const TargetLowering &TLI) {
14561   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
14562                                 IsMasked, Ptr, TLI) ||
14563       Ptr.getNode()->hasOneUse())
14564     return nullptr;
14565 
14566   // Try turning it into a post-indexed load / store except when
14567   // 1) All uses are load / store ops that use it as base ptr (and
14568   //    it may be folded as addressing mmode).
14569   // 2) Op must be independent of N, i.e. Op is neither a predecessor
14570   //    nor a successor of N. Otherwise, if Op is folded that would
14571   //    create a cycle.
14572   for (SDNode *Op : Ptr->uses()) {
14573     // Check for #1.
14574     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
14575       continue;
14576 
14577     // Check for #2.
14578     SmallPtrSet<const SDNode *, 32> Visited;
14579     SmallVector<const SDNode *, 8> Worklist;
14580     // Ptr is predecessor to both N and Op.
14581     Visited.insert(Ptr.getNode());
14582     Worklist.push_back(N);
14583     Worklist.push_back(Op);
14584     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
14585         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
14586       return Op;
14587   }
14588   return nullptr;
14589 }
14590 
14591 /// Try to combine a load/store with a add/sub of the base pointer node into a
14592 /// post-indexed load/store. The transformation folded the add/subtract into the
14593 /// new indexed load/store effectively and all of its uses are redirected to the
14594 /// new load/store.
14595 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
14596   if (Level < AfterLegalizeDAG)
14597     return false;
14598 
14599   bool IsLoad = true;
14600   bool IsMasked = false;
14601   SDValue Ptr;
14602   SDValue BasePtr;
14603   SDValue Offset;
14604   ISD::MemIndexedMode AM = ISD::UNINDEXED;
14605   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
14606                                          Offset, AM, DAG, TLI);
14607   if (!Op)
14608     return false;
14609 
14610   SDValue Result;
14611   if (!IsMasked)
14612     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14613                                          Offset, AM)
14614                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
14615                                           BasePtr, Offset, AM);
14616   else
14617     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
14618                                                BasePtr, Offset, AM)
14619                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
14620                                                 BasePtr, Offset, AM);
14621   ++PostIndexedNodes;
14622   ++NodesCombined;
14623   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
14624              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
14625              dbgs() << '\n');
14626   WorklistRemover DeadNodes(*this);
14627   if (IsLoad) {
14628     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14629     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14630   } else {
14631     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14632   }
14633 
14634   // Finally, since the node is now dead, remove it from the graph.
14635   deleteAndRecombine(N);
14636 
14637   // Replace the uses of Use with uses of the updated base value.
14638   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
14639                                 Result.getValue(IsLoad ? 1 : 0));
14640   deleteAndRecombine(Op);
14641   return true;
14642 }
14643 
14644 /// Return the base-pointer arithmetic from an indexed \p LD.
14645 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
14646   ISD::MemIndexedMode AM = LD->getAddressingMode();
14647   assert(AM != ISD::UNINDEXED);
14648   SDValue BP = LD->getOperand(1);
14649   SDValue Inc = LD->getOperand(2);
14650 
14651   // Some backends use TargetConstants for load offsets, but don't expect
14652   // TargetConstants in general ADD nodes. We can convert these constants into
14653   // regular Constants (if the constant is not opaque).
14654   assert((Inc.getOpcode() != ISD::TargetConstant ||
14655           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
14656          "Cannot split out indexing using opaque target constants");
14657   if (Inc.getOpcode() == ISD::TargetConstant) {
14658     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
14659     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
14660                           ConstInc->getValueType(0));
14661   }
14662 
14663   unsigned Opc =
14664       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
14665   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
14666 }
14667 
14668 static inline int numVectorEltsOrZero(EVT T) {
14669   return T.isVector() ? T.getVectorNumElements() : 0;
14670 }
14671 
14672 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
14673   Val = ST->getValue();
14674   EVT STType = Val.getValueType();
14675   EVT STMemType = ST->getMemoryVT();
14676   if (STType == STMemType)
14677     return true;
14678   if (isTypeLegal(STMemType))
14679     return false; // fail.
14680   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
14681       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
14682     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
14683     return true;
14684   }
14685   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
14686       STType.isInteger() && STMemType.isInteger()) {
14687     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
14688     return true;
14689   }
14690   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
14691     Val = DAG.getBitcast(STMemType, Val);
14692     return true;
14693   }
14694   return false; // fail.
14695 }
14696 
14697 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
14698   EVT LDMemType = LD->getMemoryVT();
14699   EVT LDType = LD->getValueType(0);
14700   assert(Val.getValueType() == LDMemType &&
14701          "Attempting to extend value of non-matching type");
14702   if (LDType == LDMemType)
14703     return true;
14704   if (LDMemType.isInteger() && LDType.isInteger()) {
14705     switch (LD->getExtensionType()) {
14706     case ISD::NON_EXTLOAD:
14707       Val = DAG.getBitcast(LDType, Val);
14708       return true;
14709     case ISD::EXTLOAD:
14710       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
14711       return true;
14712     case ISD::SEXTLOAD:
14713       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
14714       return true;
14715     case ISD::ZEXTLOAD:
14716       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14717       return true;
14718     }
14719   }
14720   return false;
14721 }
14722 
14723 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14724   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
14725     return SDValue();
14726   SDValue Chain = LD->getOperand(0);
14727   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14728   // TODO: Relax this restriction for unordered atomics (see D66309)
14729   if (!ST || !ST->isSimple())
14730     return SDValue();
14731 
14732   EVT LDType = LD->getValueType(0);
14733   EVT LDMemType = LD->getMemoryVT();
14734   EVT STMemType = ST->getMemoryVT();
14735   EVT STType = ST->getValue().getValueType();
14736 
14737   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14738   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14739   int64_t Offset;
14740   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14741     return SDValue();
14742 
14743   // Normalize for Endianness. After this Offset=0 will denote that the least
14744   // significant bit in the loaded value maps to the least significant bit in
14745   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14746   // n:th least significant byte of the stored value.
14747   if (DAG.getDataLayout().isBigEndian())
14748     Offset = ((int64_t)STMemType.getStoreSizeInBits() -
14749               (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
14750 
14751   // Check that the stored value cover all bits that are loaded.
14752   bool STCoversLD =
14753       (Offset >= 0) &&
14754       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
14755 
14756   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14757     if (LD->isIndexed()) {
14758       // Cannot handle opaque target constants and we must respect the user's
14759       // request not to split indexes from loads.
14760       if (!canSplitIdx(LD))
14761         return SDValue();
14762       SDValue Idx = SplitIndexingFromLoad(LD);
14763       SDValue Ops[] = {Val, Idx, Chain};
14764       return CombineTo(LD, Ops, 3);
14765     }
14766     return CombineTo(LD, Val, Chain);
14767   };
14768 
14769   if (!STCoversLD)
14770     return SDValue();
14771 
14772   // Memory as copy space (potentially masked).
14773   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
14774     // Simple case: Direct non-truncating forwarding
14775     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
14776       return ReplaceLd(LD, ST->getValue(), Chain);
14777     // Can we model the truncate and extension with an and mask?
14778     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14779         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14780       // Mask to size of LDMemType
14781       auto Mask =
14782           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
14783                                                STMemType.getSizeInBits()),
14784                           SDLoc(ST), STType);
14785       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14786       return ReplaceLd(LD, Val, Chain);
14787     }
14788   }
14789 
14790   // TODO: Deal with nonzero offset.
14791   if (LD->getBasePtr().isUndef() || Offset != 0)
14792     return SDValue();
14793   // Model necessary truncations / extenstions.
14794   SDValue Val;
14795   // Truncate Value To Stored Memory Size.
14796   do {
14797     if (!getTruncatedStoreValue(ST, Val))
14798       continue;
14799     if (!isTypeLegal(LDMemType))
14800       continue;
14801     if (STMemType != LDMemType) {
14802       // TODO: Support vectors? This requires extract_subvector/bitcast.
14803       if (!STMemType.isVector() && !LDMemType.isVector() &&
14804           STMemType.isInteger() && LDMemType.isInteger())
14805         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
14806       else
14807         continue;
14808     }
14809     if (!extendLoadedValueToExtension(LD, Val))
14810       continue;
14811     return ReplaceLd(LD, Val, Chain);
14812   } while (false);
14813 
14814   // On failure, cleanup dead nodes we may have created.
14815   if (Val->use_empty())
14816     deleteAndRecombine(Val.getNode());
14817   return SDValue();
14818 }
14819 
14820 SDValue DAGCombiner::visitLOAD(SDNode *N) {
14821   LoadSDNode *LD  = cast<LoadSDNode>(N);
14822   SDValue Chain = LD->getChain();
14823   SDValue Ptr   = LD->getBasePtr();
14824 
14825   // If load is not volatile and there are no uses of the loaded value (and
14826   // the updated indexed value in case of indexed loads), change uses of the
14827   // chain value into uses of the chain input (i.e. delete the dead load).
14828   // TODO: Allow this for unordered atomics (see D66309)
14829   if (LD->isSimple()) {
14830     if (N->getValueType(1) == MVT::Other) {
14831       // Unindexed loads.
14832       if (!N->hasAnyUseOfValue(0)) {
14833         // It's not safe to use the two value CombineTo variant here. e.g.
14834         // v1, chain2 = load chain1, loc
14835         // v2, chain3 = load chain2, loc
14836         // v3         = add v2, c
14837         // Now we replace use of chain2 with chain1.  This makes the second load
14838         // isomorphic to the one we are deleting, and thus makes this load live.
14839         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
14840                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
14841                    dbgs() << "\n");
14842         WorklistRemover DeadNodes(*this);
14843         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14844         AddUsersToWorklist(Chain.getNode());
14845         if (N->use_empty())
14846           deleteAndRecombine(N);
14847 
14848         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14849       }
14850     } else {
14851       // Indexed loads.
14852       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
14853 
14854       // If this load has an opaque TargetConstant offset, then we cannot split
14855       // the indexing into an add/sub directly (that TargetConstant may not be
14856       // valid for a different type of node, and we cannot convert an opaque
14857       // target constant into a regular constant).
14858       bool CanSplitIdx = canSplitIdx(LD);
14859 
14860       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
14861         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14862         SDValue Index;
14863         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
14864           Index = SplitIndexingFromLoad(LD);
14865           // Try to fold the base pointer arithmetic into subsequent loads and
14866           // stores.
14867           AddUsersToWorklist(N);
14868         } else
14869           Index = DAG.getUNDEF(N->getValueType(1));
14870         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
14871                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
14872                    dbgs() << " and 2 other values\n");
14873         WorklistRemover DeadNodes(*this);
14874         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14875         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14876         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14877         deleteAndRecombine(N);
14878         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14879       }
14880     }
14881   }
14882 
14883   // If this load is directly stored, replace the load value with the stored
14884   // value.
14885   if (auto V = ForwardStoreValueToDirectLoad(LD))
14886     return V;
14887 
14888   // Try to infer better alignment information than the load already has.
14889   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
14890     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
14891       if (*Alignment > LD->getAlign() &&
14892           isAligned(*Alignment, LD->getSrcValueOffset())) {
14893         SDValue NewLoad = DAG.getExtLoad(
14894             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14895             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
14896             LD->getMemOperand()->getFlags(), LD->getAAInfo());
14897         // NewLoad will always be N as we are only refining the alignment
14898         assert(NewLoad.getNode() == N);
14899         (void)NewLoad;
14900       }
14901     }
14902   }
14903 
14904   if (LD->isUnindexed()) {
14905     // Walk up chain skipping non-aliasing memory nodes.
14906     SDValue BetterChain = FindBetterChain(LD, Chain);
14907 
14908     // If there is a better chain.
14909     if (Chain != BetterChain) {
14910       SDValue ReplLoad;
14911 
14912       // Replace the chain to void dependency.
14913       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14914         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14915                                BetterChain, Ptr, LD->getMemOperand());
14916       } else {
14917         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14918                                   LD->getValueType(0),
14919                                   BetterChain, Ptr, LD->getMemoryVT(),
14920                                   LD->getMemOperand());
14921       }
14922 
14923       // Create token factor to keep old chain connected.
14924       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14925                                   MVT::Other, Chain, ReplLoad.getValue(1));
14926 
14927       // Replace uses with load result and token factor
14928       return CombineTo(N, ReplLoad.getValue(0), Token);
14929     }
14930   }
14931 
14932   // Try transforming N to an indexed load.
14933   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14934     return SDValue(N, 0);
14935 
14936   // Try to slice up N to more direct loads if the slices are mapped to
14937   // different register banks or pairing can take place.
14938   if (SliceUpLoad(N))
14939     return SDValue(N, 0);
14940 
14941   return SDValue();
14942 }
14943 
14944 namespace {
14945 
14946 /// Helper structure used to slice a load in smaller loads.
14947 /// Basically a slice is obtained from the following sequence:
14948 /// Origin = load Ty1, Base
14949 /// Shift = srl Ty1 Origin, CstTy Amount
14950 /// Inst = trunc Shift to Ty2
14951 ///
14952 /// Then, it will be rewritten into:
14953 /// Slice = load SliceTy, Base + SliceOffset
14954 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14955 ///
14956 /// SliceTy is deduced from the number of bits that are actually used to
14957 /// build Inst.
14958 struct LoadedSlice {
14959   /// Helper structure used to compute the cost of a slice.
14960   struct Cost {
14961     /// Are we optimizing for code size.
14962     bool ForCodeSize = false;
14963 
14964     /// Various cost.
14965     unsigned Loads = 0;
14966     unsigned Truncates = 0;
14967     unsigned CrossRegisterBanksCopies = 0;
14968     unsigned ZExts = 0;
14969     unsigned Shift = 0;
14970 
14971     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
14972 
14973     /// Get the cost of one isolated slice.
14974     Cost(const LoadedSlice &LS, bool ForCodeSize)
14975         : ForCodeSize(ForCodeSize), Loads(1) {
14976       EVT TruncType = LS.Inst->getValueType(0);
14977       EVT LoadedType = LS.getLoadedType();
14978       if (TruncType != LoadedType &&
14979           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14980         ZExts = 1;
14981     }
14982 
14983     /// Account for slicing gain in the current cost.
14984     /// Slicing provide a few gains like removing a shift or a
14985     /// truncate. This method allows to grow the cost of the original
14986     /// load with the gain from this slice.
14987     void addSliceGain(const LoadedSlice &LS) {
14988       // Each slice saves a truncate.
14989       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14990       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14991                               LS.Inst->getValueType(0)))
14992         ++Truncates;
14993       // If there is a shift amount, this slice gets rid of it.
14994       if (LS.Shift)
14995         ++Shift;
14996       // If this slice can merge a cross register bank copy, account for it.
14997       if (LS.canMergeExpensiveCrossRegisterBankCopy())
14998         ++CrossRegisterBanksCopies;
14999     }
15000 
15001     Cost &operator+=(const Cost &RHS) {
15002       Loads += RHS.Loads;
15003       Truncates += RHS.Truncates;
15004       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
15005       ZExts += RHS.ZExts;
15006       Shift += RHS.Shift;
15007       return *this;
15008     }
15009 
15010     bool operator==(const Cost &RHS) const {
15011       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
15012              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
15013              ZExts == RHS.ZExts && Shift == RHS.Shift;
15014     }
15015 
15016     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
15017 
15018     bool operator<(const Cost &RHS) const {
15019       // Assume cross register banks copies are as expensive as loads.
15020       // FIXME: Do we want some more target hooks?
15021       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
15022       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
15023       // Unless we are optimizing for code size, consider the
15024       // expensive operation first.
15025       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
15026         return ExpensiveOpsLHS < ExpensiveOpsRHS;
15027       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
15028              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
15029     }
15030 
15031     bool operator>(const Cost &RHS) const { return RHS < *this; }
15032 
15033     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
15034 
15035     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
15036   };
15037 
15038   // The last instruction that represent the slice. This should be a
15039   // truncate instruction.
15040   SDNode *Inst;
15041 
15042   // The original load instruction.
15043   LoadSDNode *Origin;
15044 
15045   // The right shift amount in bits from the original load.
15046   unsigned Shift;
15047 
15048   // The DAG from which Origin came from.
15049   // This is used to get some contextual information about legal types, etc.
15050   SelectionDAG *DAG;
15051 
15052   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
15053               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
15054       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
15055 
15056   /// Get the bits used in a chunk of bits \p BitWidth large.
15057   /// \return Result is \p BitWidth and has used bits set to 1 and
15058   ///         not used bits set to 0.
15059   APInt getUsedBits() const {
15060     // Reproduce the trunc(lshr) sequence:
15061     // - Start from the truncated value.
15062     // - Zero extend to the desired bit width.
15063     // - Shift left.
15064     assert(Origin && "No original load to compare against.");
15065     unsigned BitWidth = Origin->getValueSizeInBits(0);
15066     assert(Inst && "This slice is not bound to an instruction");
15067     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
15068            "Extracted slice is bigger than the whole type!");
15069     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
15070     UsedBits.setAllBits();
15071     UsedBits = UsedBits.zext(BitWidth);
15072     UsedBits <<= Shift;
15073     return UsedBits;
15074   }
15075 
15076   /// Get the size of the slice to be loaded in bytes.
15077   unsigned getLoadedSize() const {
15078     unsigned SliceSize = getUsedBits().countPopulation();
15079     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
15080     return SliceSize / 8;
15081   }
15082 
15083   /// Get the type that will be loaded for this slice.
15084   /// Note: This may not be the final type for the slice.
15085   EVT getLoadedType() const {
15086     assert(DAG && "Missing context");
15087     LLVMContext &Ctxt = *DAG->getContext();
15088     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
15089   }
15090 
15091   /// Get the alignment of the load used for this slice.
15092   Align getAlign() const {
15093     Align Alignment = Origin->getAlign();
15094     uint64_t Offset = getOffsetFromBase();
15095     if (Offset != 0)
15096       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
15097     return Alignment;
15098   }
15099 
15100   /// Check if this slice can be rewritten with legal operations.
15101   bool isLegal() const {
15102     // An invalid slice is not legal.
15103     if (!Origin || !Inst || !DAG)
15104       return false;
15105 
15106     // Offsets are for indexed load only, we do not handle that.
15107     if (!Origin->getOffset().isUndef())
15108       return false;
15109 
15110     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15111 
15112     // Check that the type is legal.
15113     EVT SliceType = getLoadedType();
15114     if (!TLI.isTypeLegal(SliceType))
15115       return false;
15116 
15117     // Check that the load is legal for this type.
15118     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
15119       return false;
15120 
15121     // Check that the offset can be computed.
15122     // 1. Check its type.
15123     EVT PtrType = Origin->getBasePtr().getValueType();
15124     if (PtrType == MVT::Untyped || PtrType.isExtended())
15125       return false;
15126 
15127     // 2. Check that it fits in the immediate.
15128     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
15129       return false;
15130 
15131     // 3. Check that the computation is legal.
15132     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
15133       return false;
15134 
15135     // Check that the zext is legal if it needs one.
15136     EVT TruncateType = Inst->getValueType(0);
15137     if (TruncateType != SliceType &&
15138         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
15139       return false;
15140 
15141     return true;
15142   }
15143 
15144   /// Get the offset in bytes of this slice in the original chunk of
15145   /// bits.
15146   /// \pre DAG != nullptr.
15147   uint64_t getOffsetFromBase() const {
15148     assert(DAG && "Missing context.");
15149     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
15150     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
15151     uint64_t Offset = Shift / 8;
15152     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
15153     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
15154            "The size of the original loaded type is not a multiple of a"
15155            " byte.");
15156     // If Offset is bigger than TySizeInBytes, it means we are loading all
15157     // zeros. This should have been optimized before in the process.
15158     assert(TySizeInBytes > Offset &&
15159            "Invalid shift amount for given loaded size");
15160     if (IsBigEndian)
15161       Offset = TySizeInBytes - Offset - getLoadedSize();
15162     return Offset;
15163   }
15164 
15165   /// Generate the sequence of instructions to load the slice
15166   /// represented by this object and redirect the uses of this slice to
15167   /// this new sequence of instructions.
15168   /// \pre this->Inst && this->Origin are valid Instructions and this
15169   /// object passed the legal check: LoadedSlice::isLegal returned true.
15170   /// \return The last instruction of the sequence used to load the slice.
15171   SDValue loadSlice() const {
15172     assert(Inst && Origin && "Unable to replace a non-existing slice.");
15173     const SDValue &OldBaseAddr = Origin->getBasePtr();
15174     SDValue BaseAddr = OldBaseAddr;
15175     // Get the offset in that chunk of bytes w.r.t. the endianness.
15176     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
15177     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
15178     if (Offset) {
15179       // BaseAddr = BaseAddr + Offset.
15180       EVT ArithType = BaseAddr.getValueType();
15181       SDLoc DL(Origin);
15182       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
15183                               DAG->getConstant(Offset, DL, ArithType));
15184     }
15185 
15186     // Create the type of the loaded slice according to its size.
15187     EVT SliceType = getLoadedType();
15188 
15189     // Create the load for the slice.
15190     SDValue LastInst =
15191         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
15192                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
15193                      Origin->getMemOperand()->getFlags());
15194     // If the final type is not the same as the loaded type, this means that
15195     // we have to pad with zero. Create a zero extend for that.
15196     EVT FinalType = Inst->getValueType(0);
15197     if (SliceType != FinalType)
15198       LastInst =
15199           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
15200     return LastInst;
15201   }
15202 
15203   /// Check if this slice can be merged with an expensive cross register
15204   /// bank copy. E.g.,
15205   /// i = load i32
15206   /// f = bitcast i32 i to float
15207   bool canMergeExpensiveCrossRegisterBankCopy() const {
15208     if (!Inst || !Inst->hasOneUse())
15209       return false;
15210     SDNode *Use = *Inst->use_begin();
15211     if (Use->getOpcode() != ISD::BITCAST)
15212       return false;
15213     assert(DAG && "Missing context");
15214     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15215     EVT ResVT = Use->getValueType(0);
15216     const TargetRegisterClass *ResRC =
15217         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
15218     const TargetRegisterClass *ArgRC =
15219         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
15220                            Use->getOperand(0)->isDivergent());
15221     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
15222       return false;
15223 
15224     // At this point, we know that we perform a cross-register-bank copy.
15225     // Check if it is expensive.
15226     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
15227     // Assume bitcasts are cheap, unless both register classes do not
15228     // explicitly share a common sub class.
15229     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
15230       return false;
15231 
15232     // Check if it will be merged with the load.
15233     // 1. Check the alignment constraint.
15234     Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
15235         ResVT.getTypeForEVT(*DAG->getContext()));
15236 
15237     if (RequiredAlignment > getAlign())
15238       return false;
15239 
15240     // 2. Check that the load is a legal operation for that type.
15241     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
15242       return false;
15243 
15244     // 3. Check that we do not have a zext in the way.
15245     if (Inst->getValueType(0) != getLoadedType())
15246       return false;
15247 
15248     return true;
15249   }
15250 };
15251 
15252 } // end anonymous namespace
15253 
15254 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
15255 /// \p UsedBits looks like 0..0 1..1 0..0.
15256 static bool areUsedBitsDense(const APInt &UsedBits) {
15257   // If all the bits are one, this is dense!
15258   if (UsedBits.isAllOnesValue())
15259     return true;
15260 
15261   // Get rid of the unused bits on the right.
15262   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
15263   // Get rid of the unused bits on the left.
15264   if (NarrowedUsedBits.countLeadingZeros())
15265     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
15266   // Check that the chunk of bits is completely used.
15267   return NarrowedUsedBits.isAllOnesValue();
15268 }
15269 
15270 /// Check whether or not \p First and \p Second are next to each other
15271 /// in memory. This means that there is no hole between the bits loaded
15272 /// by \p First and the bits loaded by \p Second.
15273 static bool areSlicesNextToEachOther(const LoadedSlice &First,
15274                                      const LoadedSlice &Second) {
15275   assert(First.Origin == Second.Origin && First.Origin &&
15276          "Unable to match different memory origins.");
15277   APInt UsedBits = First.getUsedBits();
15278   assert((UsedBits & Second.getUsedBits()) == 0 &&
15279          "Slices are not supposed to overlap.");
15280   UsedBits |= Second.getUsedBits();
15281   return areUsedBitsDense(UsedBits);
15282 }
15283 
15284 /// Adjust the \p GlobalLSCost according to the target
15285 /// paring capabilities and the layout of the slices.
15286 /// \pre \p GlobalLSCost should account for at least as many loads as
15287 /// there is in the slices in \p LoadedSlices.
15288 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15289                                  LoadedSlice::Cost &GlobalLSCost) {
15290   unsigned NumberOfSlices = LoadedSlices.size();
15291   // If there is less than 2 elements, no pairing is possible.
15292   if (NumberOfSlices < 2)
15293     return;
15294 
15295   // Sort the slices so that elements that are likely to be next to each
15296   // other in memory are next to each other in the list.
15297   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
15298     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
15299     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
15300   });
15301   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
15302   // First (resp. Second) is the first (resp. Second) potentially candidate
15303   // to be placed in a paired load.
15304   const LoadedSlice *First = nullptr;
15305   const LoadedSlice *Second = nullptr;
15306   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
15307                 // Set the beginning of the pair.
15308                                                            First = Second) {
15309     Second = &LoadedSlices[CurrSlice];
15310 
15311     // If First is NULL, it means we start a new pair.
15312     // Get to the next slice.
15313     if (!First)
15314       continue;
15315 
15316     EVT LoadedType = First->getLoadedType();
15317 
15318     // If the types of the slices are different, we cannot pair them.
15319     if (LoadedType != Second->getLoadedType())
15320       continue;
15321 
15322     // Check if the target supplies paired loads for this type.
15323     Align RequiredAlignment;
15324     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
15325       // move to the next pair, this type is hopeless.
15326       Second = nullptr;
15327       continue;
15328     }
15329     // Check if we meet the alignment requirement.
15330     if (First->getAlign() < RequiredAlignment)
15331       continue;
15332 
15333     // Check that both loads are next to each other in memory.
15334     if (!areSlicesNextToEachOther(*First, *Second))
15335       continue;
15336 
15337     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
15338     --GlobalLSCost.Loads;
15339     // Move to the next pair.
15340     Second = nullptr;
15341   }
15342 }
15343 
15344 /// Check the profitability of all involved LoadedSlice.
15345 /// Currently, it is considered profitable if there is exactly two
15346 /// involved slices (1) which are (2) next to each other in memory, and
15347 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
15348 ///
15349 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
15350 /// the elements themselves.
15351 ///
15352 /// FIXME: When the cost model will be mature enough, we can relax
15353 /// constraints (1) and (2).
15354 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15355                                 const APInt &UsedBits, bool ForCodeSize) {
15356   unsigned NumberOfSlices = LoadedSlices.size();
15357   if (StressLoadSlicing)
15358     return NumberOfSlices > 1;
15359 
15360   // Check (1).
15361   if (NumberOfSlices != 2)
15362     return false;
15363 
15364   // Check (2).
15365   if (!areUsedBitsDense(UsedBits))
15366     return false;
15367 
15368   // Check (3).
15369   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
15370   // The original code has one big load.
15371   OrigCost.Loads = 1;
15372   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
15373     const LoadedSlice &LS = LoadedSlices[CurrSlice];
15374     // Accumulate the cost of all the slices.
15375     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
15376     GlobalSlicingCost += SliceCost;
15377 
15378     // Account as cost in the original configuration the gain obtained
15379     // with the current slices.
15380     OrigCost.addSliceGain(LS);
15381   }
15382 
15383   // If the target supports paired load, adjust the cost accordingly.
15384   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
15385   return OrigCost > GlobalSlicingCost;
15386 }
15387 
15388 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
15389 /// operations, split it in the various pieces being extracted.
15390 ///
15391 /// This sort of thing is introduced by SROA.
15392 /// This slicing takes care not to insert overlapping loads.
15393 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
15394 bool DAGCombiner::SliceUpLoad(SDNode *N) {
15395   if (Level < AfterLegalizeDAG)
15396     return false;
15397 
15398   LoadSDNode *LD = cast<LoadSDNode>(N);
15399   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
15400       !LD->getValueType(0).isInteger())
15401     return false;
15402 
15403   // The algorithm to split up a load of a scalable vector into individual
15404   // elements currently requires knowing the length of the loaded type,
15405   // so will need adjusting to work on scalable vectors.
15406   if (LD->getValueType(0).isScalableVector())
15407     return false;
15408 
15409   // Keep track of already used bits to detect overlapping values.
15410   // In that case, we will just abort the transformation.
15411   APInt UsedBits(LD->getValueSizeInBits(0), 0);
15412 
15413   SmallVector<LoadedSlice, 4> LoadedSlices;
15414 
15415   // Check if this load is used as several smaller chunks of bits.
15416   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
15417   // of computation for each trunc.
15418   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
15419        UI != UIEnd; ++UI) {
15420     // Skip the uses of the chain.
15421     if (UI.getUse().getResNo() != 0)
15422       continue;
15423 
15424     SDNode *User = *UI;
15425     unsigned Shift = 0;
15426 
15427     // Check if this is a trunc(lshr).
15428     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
15429         isa<ConstantSDNode>(User->getOperand(1))) {
15430       Shift = User->getConstantOperandVal(1);
15431       User = *User->use_begin();
15432     }
15433 
15434     // At this point, User is a Truncate, iff we encountered, trunc or
15435     // trunc(lshr).
15436     if (User->getOpcode() != ISD::TRUNCATE)
15437       return false;
15438 
15439     // The width of the type must be a power of 2 and greater than 8-bits.
15440     // Otherwise the load cannot be represented in LLVM IR.
15441     // Moreover, if we shifted with a non-8-bits multiple, the slice
15442     // will be across several bytes. We do not support that.
15443     unsigned Width = User->getValueSizeInBits(0);
15444     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
15445       return false;
15446 
15447     // Build the slice for this chain of computations.
15448     LoadedSlice LS(User, LD, Shift, &DAG);
15449     APInt CurrentUsedBits = LS.getUsedBits();
15450 
15451     // Check if this slice overlaps with another.
15452     if ((CurrentUsedBits & UsedBits) != 0)
15453       return false;
15454     // Update the bits used globally.
15455     UsedBits |= CurrentUsedBits;
15456 
15457     // Check if the new slice would be legal.
15458     if (!LS.isLegal())
15459       return false;
15460 
15461     // Record the slice.
15462     LoadedSlices.push_back(LS);
15463   }
15464 
15465   // Abort slicing if it does not seem to be profitable.
15466   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
15467     return false;
15468 
15469   ++SlicedLoads;
15470 
15471   // Rewrite each chain to use an independent load.
15472   // By construction, each chain can be represented by a unique load.
15473 
15474   // Prepare the argument for the new token factor for all the slices.
15475   SmallVector<SDValue, 8> ArgChains;
15476   for (SmallVectorImpl<LoadedSlice>::const_iterator
15477            LSIt = LoadedSlices.begin(),
15478            LSItEnd = LoadedSlices.end();
15479        LSIt != LSItEnd; ++LSIt) {
15480     SDValue SliceInst = LSIt->loadSlice();
15481     CombineTo(LSIt->Inst, SliceInst, true);
15482     if (SliceInst.getOpcode() != ISD::LOAD)
15483       SliceInst = SliceInst.getOperand(0);
15484     assert(SliceInst->getOpcode() == ISD::LOAD &&
15485            "It takes more than a zext to get to the loaded slice!!");
15486     ArgChains.push_back(SliceInst.getValue(1));
15487   }
15488 
15489   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
15490                               ArgChains);
15491   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15492   AddToWorklist(Chain.getNode());
15493   return true;
15494 }
15495 
15496 /// Check to see if V is (and load (ptr), imm), where the load is having
15497 /// specific bytes cleared out.  If so, return the byte size being masked out
15498 /// and the shift amount.
15499 static std::pair<unsigned, unsigned>
15500 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
15501   std::pair<unsigned, unsigned> Result(0, 0);
15502 
15503   // Check for the structure we're looking for.
15504   if (V->getOpcode() != ISD::AND ||
15505       !isa<ConstantSDNode>(V->getOperand(1)) ||
15506       !ISD::isNormalLoad(V->getOperand(0).getNode()))
15507     return Result;
15508 
15509   // Check the chain and pointer.
15510   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
15511   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
15512 
15513   // This only handles simple types.
15514   if (V.getValueType() != MVT::i16 &&
15515       V.getValueType() != MVT::i32 &&
15516       V.getValueType() != MVT::i64)
15517     return Result;
15518 
15519   // Check the constant mask.  Invert it so that the bits being masked out are
15520   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
15521   // follow the sign bit for uniformity.
15522   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
15523   unsigned NotMaskLZ = countLeadingZeros(NotMask);
15524   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
15525   unsigned NotMaskTZ = countTrailingZeros(NotMask);
15526   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
15527   if (NotMaskLZ == 64) return Result;  // All zero mask.
15528 
15529   // See if we have a continuous run of bits.  If so, we have 0*1+0*
15530   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
15531     return Result;
15532 
15533   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
15534   if (V.getValueType() != MVT::i64 && NotMaskLZ)
15535     NotMaskLZ -= 64-V.getValueSizeInBits();
15536 
15537   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
15538   switch (MaskedBytes) {
15539   case 1:
15540   case 2:
15541   case 4: break;
15542   default: return Result; // All one mask, or 5-byte mask.
15543   }
15544 
15545   // Verify that the first bit starts at a multiple of mask so that the access
15546   // is aligned the same as the access width.
15547   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
15548 
15549   // For narrowing to be valid, it must be the case that the load the
15550   // immediately preceding memory operation before the store.
15551   if (LD == Chain.getNode())
15552     ; // ok.
15553   else if (Chain->getOpcode() == ISD::TokenFactor &&
15554            SDValue(LD, 1).hasOneUse()) {
15555     // LD has only 1 chain use so they are no indirect dependencies.
15556     if (!LD->isOperandOf(Chain.getNode()))
15557       return Result;
15558   } else
15559     return Result; // Fail.
15560 
15561   Result.first = MaskedBytes;
15562   Result.second = NotMaskTZ/8;
15563   return Result;
15564 }
15565 
15566 /// Check to see if IVal is something that provides a value as specified by
15567 /// MaskInfo. If so, replace the specified store with a narrower store of
15568 /// truncated IVal.
15569 static SDValue
15570 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
15571                                 SDValue IVal, StoreSDNode *St,
15572                                 DAGCombiner *DC) {
15573   unsigned NumBytes = MaskInfo.first;
15574   unsigned ByteShift = MaskInfo.second;
15575   SelectionDAG &DAG = DC->getDAG();
15576 
15577   // Check to see if IVal is all zeros in the part being masked in by the 'or'
15578   // that uses this.  If not, this is not a replacement.
15579   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
15580                                   ByteShift*8, (ByteShift+NumBytes)*8);
15581   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
15582 
15583   // Check that it is legal on the target to do this.  It is legal if the new
15584   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
15585   // legalization (and the target doesn't explicitly think this is a bad idea).
15586   MVT VT = MVT::getIntegerVT(NumBytes * 8);
15587   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15588   if (!DC->isTypeLegal(VT))
15589     return SDValue();
15590   if (St->getMemOperand() &&
15591       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15592                               *St->getMemOperand()))
15593     return SDValue();
15594 
15595   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
15596   // shifted by ByteShift and truncated down to NumBytes.
15597   if (ByteShift) {
15598     SDLoc DL(IVal);
15599     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
15600                        DAG.getConstant(ByteShift*8, DL,
15601                                     DC->getShiftAmountTy(IVal.getValueType())));
15602   }
15603 
15604   // Figure out the offset for the store and the alignment of the access.
15605   unsigned StOffset;
15606   unsigned NewAlign = St->getAlignment();
15607 
15608   if (DAG.getDataLayout().isLittleEndian())
15609     StOffset = ByteShift;
15610   else
15611     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
15612 
15613   SDValue Ptr = St->getBasePtr();
15614   if (StOffset) {
15615     SDLoc DL(IVal);
15616     Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
15617     NewAlign = MinAlign(NewAlign, StOffset);
15618   }
15619 
15620   // Truncate down to the new size.
15621   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
15622 
15623   ++OpsNarrowed;
15624   return DAG
15625       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
15626                 St->getPointerInfo().getWithOffset(StOffset), NewAlign);
15627 }
15628 
15629 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
15630 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
15631 /// narrowing the load and store if it would end up being a win for performance
15632 /// or code size.
15633 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
15634   StoreSDNode *ST  = cast<StoreSDNode>(N);
15635   if (!ST->isSimple())
15636     return SDValue();
15637 
15638   SDValue Chain = ST->getChain();
15639   SDValue Value = ST->getValue();
15640   SDValue Ptr   = ST->getBasePtr();
15641   EVT VT = Value.getValueType();
15642 
15643   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
15644     return SDValue();
15645 
15646   unsigned Opc = Value.getOpcode();
15647 
15648   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
15649   // is a byte mask indicating a consecutive number of bytes, check to see if
15650   // Y is known to provide just those bytes.  If so, we try to replace the
15651   // load + replace + store sequence with a single (narrower) store, which makes
15652   // the load dead.
15653   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
15654     std::pair<unsigned, unsigned> MaskedLoad;
15655     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
15656     if (MaskedLoad.first)
15657       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15658                                                   Value.getOperand(1), ST,this))
15659         return NewST;
15660 
15661     // Or is commutative, so try swapping X and Y.
15662     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
15663     if (MaskedLoad.first)
15664       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15665                                                   Value.getOperand(0), ST,this))
15666         return NewST;
15667   }
15668 
15669   if (!EnableReduceLoadOpStoreWidth)
15670     return SDValue();
15671 
15672   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
15673       Value.getOperand(1).getOpcode() != ISD::Constant)
15674     return SDValue();
15675 
15676   SDValue N0 = Value.getOperand(0);
15677   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15678       Chain == SDValue(N0.getNode(), 1)) {
15679     LoadSDNode *LD = cast<LoadSDNode>(N0);
15680     if (LD->getBasePtr() != Ptr ||
15681         LD->getPointerInfo().getAddrSpace() !=
15682         ST->getPointerInfo().getAddrSpace())
15683       return SDValue();
15684 
15685     // Find the type to narrow it the load / op / store to.
15686     SDValue N1 = Value.getOperand(1);
15687     unsigned BitWidth = N1.getValueSizeInBits();
15688     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
15689     if (Opc == ISD::AND)
15690       Imm ^= APInt::getAllOnesValue(BitWidth);
15691     if (Imm == 0 || Imm.isAllOnesValue())
15692       return SDValue();
15693     unsigned ShAmt = Imm.countTrailingZeros();
15694     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
15695     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
15696     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15697     // The narrowing should be profitable, the load/store operation should be
15698     // legal (or custom) and the store size should be equal to the NewVT width.
15699     while (NewBW < BitWidth &&
15700            (NewVT.getStoreSizeInBits() != NewBW ||
15701             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
15702             !TLI.isNarrowingProfitable(VT, NewVT))) {
15703       NewBW = NextPowerOf2(NewBW);
15704       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15705     }
15706     if (NewBW >= BitWidth)
15707       return SDValue();
15708 
15709     // If the lsb changed does not start at the type bitwidth boundary,
15710     // start at the previous one.
15711     if (ShAmt % NewBW)
15712       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
15713     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
15714                                    std::min(BitWidth, ShAmt + NewBW));
15715     if ((Imm & Mask) == Imm) {
15716       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
15717       if (Opc == ISD::AND)
15718         NewImm ^= APInt::getAllOnesValue(NewBW);
15719       uint64_t PtrOff = ShAmt / 8;
15720       // For big endian targets, we need to adjust the offset to the pointer to
15721       // load the correct bytes.
15722       if (DAG.getDataLayout().isBigEndian())
15723         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
15724 
15725       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
15726       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
15727       if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
15728         return SDValue();
15729 
15730       SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
15731       SDValue NewLD =
15732           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15733                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15734                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
15735       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15736                                    DAG.getConstant(NewImm, SDLoc(Value),
15737                                                    NewVT));
15738       SDValue NewST =
15739           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15740                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15741 
15742       AddToWorklist(NewPtr.getNode());
15743       AddToWorklist(NewLD.getNode());
15744       AddToWorklist(NewVal.getNode());
15745       WorklistRemover DeadNodes(*this);
15746       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15747       ++OpsNarrowed;
15748       return NewST;
15749     }
15750   }
15751 
15752   return SDValue();
15753 }
15754 
15755 /// For a given floating point load / store pair, if the load value isn't used
15756 /// by any other operations, then consider transforming the pair to integer
15757 /// load / store operations if the target deems the transformation profitable.
15758 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15759   StoreSDNode *ST  = cast<StoreSDNode>(N);
15760   SDValue Value = ST->getValue();
15761   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
15762       Value.hasOneUse()) {
15763     LoadSDNode *LD = cast<LoadSDNode>(Value);
15764     EVT VT = LD->getMemoryVT();
15765     if (!VT.isFloatingPoint() ||
15766         VT != ST->getMemoryVT() ||
15767         LD->isNonTemporal() ||
15768         ST->isNonTemporal() ||
15769         LD->getPointerInfo().getAddrSpace() != 0 ||
15770         ST->getPointerInfo().getAddrSpace() != 0)
15771       return SDValue();
15772 
15773     TypeSize VTSize = VT.getSizeInBits();
15774 
15775     // We don't know the size of scalable types at compile time so we cannot
15776     // create an integer of the equivalent size.
15777     if (VTSize.isScalable())
15778       return SDValue();
15779 
15780     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
15781     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15782         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
15783         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
15784         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
15785       return SDValue();
15786 
15787     Align LDAlign = LD->getAlign();
15788     Align STAlign = ST->getAlign();
15789     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15790     Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
15791     if (LDAlign < ABIAlign || STAlign < ABIAlign)
15792       return SDValue();
15793 
15794     SDValue NewLD =
15795         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
15796                     LD->getPointerInfo(), LDAlign);
15797 
15798     SDValue NewST =
15799         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
15800                      ST->getPointerInfo(), STAlign);
15801 
15802     AddToWorklist(NewLD.getNode());
15803     AddToWorklist(NewST.getNode());
15804     WorklistRemover DeadNodes(*this);
15805     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
15806     ++LdStFP2Int;
15807     return NewST;
15808   }
15809 
15810   return SDValue();
15811 }
15812 
15813 // This is a helper function for visitMUL to check the profitability
15814 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
15815 // MulNode is the original multiply, AddNode is (add x, c1),
15816 // and ConstNode is c2.
15817 //
15818 // If the (add x, c1) has multiple uses, we could increase
15819 // the number of adds if we make this transformation.
15820 // It would only be worth doing this if we can remove a
15821 // multiply in the process. Check for that here.
15822 // To illustrate:
15823 //     (A + c1) * c3
15824 //     (A + c2) * c3
15825 // We're checking for cases where we have common "c3 * A" expressions.
15826 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
15827                                               SDValue &AddNode,
15828                                               SDValue &ConstNode) {
15829   APInt Val;
15830 
15831   // If the add only has one use, this would be OK to do.
15832   if (AddNode.getNode()->hasOneUse())
15833     return true;
15834 
15835   // Walk all the users of the constant with which we're multiplying.
15836   for (SDNode *Use : ConstNode->uses()) {
15837     if (Use == MulNode) // This use is the one we're on right now. Skip it.
15838       continue;
15839 
15840     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15841       SDNode *OtherOp;
15842       SDNode *MulVar = AddNode.getOperand(0).getNode();
15843 
15844       // OtherOp is what we're multiplying against the constant.
15845       if (Use->getOperand(0) == ConstNode)
15846         OtherOp = Use->getOperand(1).getNode();
15847       else
15848         OtherOp = Use->getOperand(0).getNode();
15849 
15850       // Check to see if multiply is with the same operand of our "add".
15851       //
15852       //     ConstNode  = CONST
15853       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
15854       //     ...
15855       //     AddNode  = (A + c1)  <-- MulVar is A.
15856       //         = AddNode * ConstNode   <-- current visiting instruction.
15857       //
15858       // If we make this transformation, we will have a common
15859       // multiply (ConstNode * A) that we can save.
15860       if (OtherOp == MulVar)
15861         return true;
15862 
15863       // Now check to see if a future expansion will give us a common
15864       // multiply.
15865       //
15866       //     ConstNode  = CONST
15867       //     AddNode    = (A + c1)
15868       //     ...   = AddNode * ConstNode <-- current visiting instruction.
15869       //     ...
15870       //     OtherOp = (A + c2)
15871       //     Use     = OtherOp * ConstNode <-- visiting Use.
15872       //
15873       // If we make this transformation, we will have a common
15874       // multiply (CONST * A) after we also do the same transformation
15875       // to the "t2" instruction.
15876       if (OtherOp->getOpcode() == ISD::ADD &&
15877           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
15878           OtherOp->getOperand(0).getNode() == MulVar)
15879         return true;
15880     }
15881   }
15882 
15883   // Didn't find a case where this would be profitable.
15884   return false;
15885 }
15886 
15887 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15888                                          unsigned NumStores) {
15889   SmallVector<SDValue, 8> Chains;
15890   SmallPtrSet<const SDNode *, 8> Visited;
15891   SDLoc StoreDL(StoreNodes[0].MemNode);
15892 
15893   for (unsigned i = 0; i < NumStores; ++i) {
15894     Visited.insert(StoreNodes[i].MemNode);
15895   }
15896 
15897   // don't include nodes that are children or repeated nodes.
15898   for (unsigned i = 0; i < NumStores; ++i) {
15899     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15900       Chains.push_back(StoreNodes[i].MemNode->getChain());
15901   }
15902 
15903   assert(Chains.size() > 0 && "Chain should have generated a chain");
15904   return DAG.getTokenFactor(StoreDL, Chains);
15905 }
15906 
15907 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
15908     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15909     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15910   // Make sure we have something to merge.
15911   if (NumStores < 2)
15912     return false;
15913 
15914   // The latest Node in the DAG.
15915   SDLoc DL(StoreNodes[0].MemNode);
15916 
15917   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
15918   unsigned SizeInBits = NumStores * ElementSizeBits;
15919   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15920 
15921   EVT StoreTy;
15922   if (UseVector) {
15923     unsigned Elts = NumStores * NumMemElts;
15924     // Get the type for the merged vector store.
15925     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15926   } else
15927     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15928 
15929   SDValue StoredVal;
15930   if (UseVector) {
15931     if (IsConstantSrc) {
15932       SmallVector<SDValue, 8> BuildVector;
15933       for (unsigned I = 0; I != NumStores; ++I) {
15934         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15935         SDValue Val = St->getValue();
15936         // If constant is of the wrong type, convert it now.
15937         if (MemVT != Val.getValueType()) {
15938           Val = peekThroughBitcasts(Val);
15939           // Deal with constants of wrong size.
15940           if (ElementSizeBits != Val.getValueSizeInBits()) {
15941             EVT IntMemVT =
15942                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
15943             if (isa<ConstantFPSDNode>(Val)) {
15944               // Not clear how to truncate FP values.
15945               return false;
15946             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15947               Val = DAG.getConstant(C->getAPIntValue()
15948                                         .zextOrTrunc(Val.getValueSizeInBits())
15949                                         .zextOrTrunc(ElementSizeBits),
15950                                     SDLoc(C), IntMemVT);
15951           }
15952           // Make sure correctly size type is the correct type.
15953           Val = DAG.getBitcast(MemVT, Val);
15954         }
15955         BuildVector.push_back(Val);
15956       }
15957       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15958                                                : ISD::BUILD_VECTOR,
15959                               DL, StoreTy, BuildVector);
15960     } else {
15961       SmallVector<SDValue, 8> Ops;
15962       for (unsigned i = 0; i < NumStores; ++i) {
15963         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15964         SDValue Val = peekThroughBitcasts(St->getValue());
15965         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15966         // type MemVT. If the underlying value is not the correct
15967         // type, but it is an extraction of an appropriate vector we
15968         // can recast Val to be of the correct type. This may require
15969         // converting between EXTRACT_VECTOR_ELT and
15970         // EXTRACT_SUBVECTOR.
15971         if ((MemVT != Val.getValueType()) &&
15972             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15973              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
15974           EVT MemVTScalarTy = MemVT.getScalarType();
15975           // We may need to add a bitcast here to get types to line up.
15976           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15977             Val = DAG.getBitcast(MemVT, Val);
15978           } else {
15979             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15980                                             : ISD::EXTRACT_VECTOR_ELT;
15981             SDValue Vec = Val.getOperand(0);
15982             SDValue Idx = Val.getOperand(1);
15983             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15984           }
15985         }
15986         Ops.push_back(Val);
15987       }
15988 
15989       // Build the extracted vector elements back into a vector.
15990       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15991                                                : ISD::BUILD_VECTOR,
15992                               DL, StoreTy, Ops);
15993     }
15994   } else {
15995     // We should always use a vector store when merging extracted vector
15996     // elements, so this path implies a store of constants.
15997     assert(IsConstantSrc && "Merged vector elements should use vector store");
15998 
15999     APInt StoreInt(SizeInBits, 0);
16000 
16001     // Construct a single integer constant which is made of the smaller
16002     // constant inputs.
16003     bool IsLE = DAG.getDataLayout().isLittleEndian();
16004     for (unsigned i = 0; i < NumStores; ++i) {
16005       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
16006       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
16007 
16008       SDValue Val = St->getValue();
16009       Val = peekThroughBitcasts(Val);
16010       StoreInt <<= ElementSizeBits;
16011       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
16012         StoreInt |= C->getAPIntValue()
16013                         .zextOrTrunc(ElementSizeBits)
16014                         .zextOrTrunc(SizeInBits);
16015       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
16016         StoreInt |= C->getValueAPF()
16017                         .bitcastToAPInt()
16018                         .zextOrTrunc(ElementSizeBits)
16019                         .zextOrTrunc(SizeInBits);
16020         // If fp truncation is necessary give up for now.
16021         if (MemVT.getSizeInBits() != ElementSizeBits)
16022           return false;
16023       } else {
16024         llvm_unreachable("Invalid constant element type");
16025       }
16026     }
16027 
16028     // Create the new Load and Store operations.
16029     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
16030   }
16031 
16032   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16033   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
16034 
16035   // make sure we use trunc store if it's necessary to be legal.
16036   SDValue NewStore;
16037   if (!UseTrunc) {
16038     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
16039                             FirstInChain->getPointerInfo(),
16040                             FirstInChain->getAlignment());
16041   } else { // Must be realized as a trunc store
16042     EVT LegalizedStoredValTy =
16043         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
16044     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
16045     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
16046     SDValue ExtendedStoreVal =
16047         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
16048                         LegalizedStoredValTy);
16049     NewStore = DAG.getTruncStore(
16050         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
16051         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
16052         FirstInChain->getAlignment(),
16053         FirstInChain->getMemOperand()->getFlags());
16054   }
16055 
16056   // Replace all merged stores with the new store.
16057   for (unsigned i = 0; i < NumStores; ++i)
16058     CombineTo(StoreNodes[i].MemNode, NewStore);
16059 
16060   AddToWorklist(NewChain.getNode());
16061   return true;
16062 }
16063 
16064 void DAGCombiner::getStoreMergeCandidates(
16065     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
16066     SDNode *&RootNode) {
16067   // This holds the base pointer, index, and the offset in bytes from the base
16068   // pointer.
16069   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
16070   EVT MemVT = St->getMemoryVT();
16071 
16072   SDValue Val = peekThroughBitcasts(St->getValue());
16073   // We must have a base and an offset.
16074   if (!BasePtr.getBase().getNode())
16075     return;
16076 
16077   // Do not handle stores to undef base pointers.
16078   if (BasePtr.getBase().isUndef())
16079     return;
16080 
16081   StoreSource StoreSrc = getStoreSource(Val);
16082   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
16083   BaseIndexOffset LBasePtr;
16084   // Match on loadbaseptr if relevant.
16085   EVT LoadVT;
16086   if (StoreSrc == StoreSource::Load) {
16087     auto *Ld = cast<LoadSDNode>(Val);
16088     LBasePtr = BaseIndexOffset::match(Ld, DAG);
16089     LoadVT = Ld->getMemoryVT();
16090     // Load and store should be the same type.
16091     if (MemVT != LoadVT)
16092       return;
16093     // Loads must only have one use.
16094     if (!Ld->hasNUsesOfValue(1, 0))
16095       return;
16096     // The memory operands must not be volatile/indexed/atomic.
16097     // TODO: May be able to relax for unordered atomics (see D66309)
16098     if (!Ld->isSimple() || Ld->isIndexed())
16099       return;
16100   }
16101   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
16102                             int64_t &Offset) -> bool {
16103     // The memory operands must not be volatile/indexed/atomic.
16104     // TODO: May be able to relax for unordered atomics (see D66309)
16105     if (!Other->isSimple() ||  Other->isIndexed())
16106       return false;
16107     // Don't mix temporal stores with non-temporal stores.
16108     if (St->isNonTemporal() != Other->isNonTemporal())
16109       return false;
16110     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
16111     // Allow merging constants of different types as integers.
16112     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
16113                                            : Other->getMemoryVT() != MemVT;
16114     if (StoreSrc == StoreSource::Load) {
16115       if (NoTypeMatch)
16116         return false;
16117       // The Load's Base Ptr must also match
16118       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
16119         BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
16120         if (LoadVT != OtherLd->getMemoryVT())
16121           return false;
16122         // Loads must only have one use.
16123         if (!OtherLd->hasNUsesOfValue(1, 0))
16124           return false;
16125         // The memory operands must not be volatile/indexed/atomic.
16126         // TODO: May be able to relax for unordered atomics (see D66309)
16127         if (!OtherLd->isSimple() ||
16128             OtherLd->isIndexed())
16129           return false;
16130         // Don't mix temporal loads with non-temporal loads.
16131         if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
16132           return false;
16133         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
16134           return false;
16135       } else
16136         return false;
16137     }
16138     if (StoreSrc == StoreSource::Constant) {
16139       if (NoTypeMatch)
16140         return false;
16141       if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
16142         return false;
16143     }
16144     if (StoreSrc == StoreSource::Extract) {
16145       // Do not merge truncated stores here.
16146       if (Other->isTruncatingStore())
16147         return false;
16148       if (!MemVT.bitsEq(OtherBC.getValueType()))
16149         return false;
16150       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
16151           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16152         return false;
16153     }
16154     Ptr = BaseIndexOffset::match(Other, DAG);
16155     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
16156   };
16157 
16158   // Check if the pair of StoreNode and the RootNode already bail out many
16159   // times which is over the limit in dependence check.
16160   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
16161                                         SDNode *RootNode) -> bool {
16162     auto RootCount = StoreRootCountMap.find(StoreNode);
16163     if (RootCount != StoreRootCountMap.end() &&
16164         RootCount->second.first == RootNode &&
16165         RootCount->second.second > StoreMergeDependenceLimit)
16166       return true;
16167     return false;
16168   };
16169 
16170   // We looking for a root node which is an ancestor to all mergable
16171   // stores. We search up through a load, to our root and then down
16172   // through all children. For instance we will find Store{1,2,3} if
16173   // St is Store1, Store2. or Store3 where the root is not a load
16174   // which always true for nonvolatile ops. TODO: Expand
16175   // the search to find all valid candidates through multiple layers of loads.
16176   //
16177   // Root
16178   // |-------|-------|
16179   // Load    Load    Store3
16180   // |       |
16181   // Store1   Store2
16182   //
16183   // FIXME: We should be able to climb and
16184   // descend TokenFactors to find candidates as well.
16185 
16186   RootNode = St->getChain().getNode();
16187 
16188   unsigned NumNodesExplored = 0;
16189   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
16190     RootNode = Ldn->getChain().getNode();
16191     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16192          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
16193       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
16194         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
16195           if (I2.getOperandNo() == 0)
16196             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
16197               BaseIndexOffset Ptr;
16198               int64_t PtrDiff;
16199               if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
16200                   !OverLimitInDependenceCheck(OtherST, RootNode))
16201                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
16202             }
16203   } else
16204     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16205          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
16206       if (I.getOperandNo() == 0)
16207         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
16208           BaseIndexOffset Ptr;
16209           int64_t PtrDiff;
16210           if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
16211               !OverLimitInDependenceCheck(OtherST, RootNode))
16212             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
16213         }
16214 }
16215 
16216 // We need to check that merging these stores does not cause a loop in
16217 // the DAG. Any store candidate may depend on another candidate
16218 // indirectly through its operand (we already consider dependencies
16219 // through the chain). Check in parallel by searching up from
16220 // non-chain operands of candidates.
16221 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
16222     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
16223     SDNode *RootNode) {
16224   // FIXME: We should be able to truncate a full search of
16225   // predecessors by doing a BFS and keeping tabs the originating
16226   // stores from which worklist nodes come from in a similar way to
16227   // TokenFactor simplfication.
16228 
16229   SmallPtrSet<const SDNode *, 32> Visited;
16230   SmallVector<const SDNode *, 8> Worklist;
16231 
16232   // RootNode is a predecessor to all candidates so we need not search
16233   // past it. Add RootNode (peeking through TokenFactors). Do not count
16234   // these towards size check.
16235 
16236   Worklist.push_back(RootNode);
16237   while (!Worklist.empty()) {
16238     auto N = Worklist.pop_back_val();
16239     if (!Visited.insert(N).second)
16240       continue; // Already present in Visited.
16241     if (N->getOpcode() == ISD::TokenFactor) {
16242       for (SDValue Op : N->ops())
16243         Worklist.push_back(Op.getNode());
16244     }
16245   }
16246 
16247   // Don't count pruning nodes towards max.
16248   unsigned int Max = 1024 + Visited.size();
16249   // Search Ops of store candidates.
16250   for (unsigned i = 0; i < NumStores; ++i) {
16251     SDNode *N = StoreNodes[i].MemNode;
16252     // Of the 4 Store Operands:
16253     //   * Chain (Op 0) -> We have already considered these
16254     //                    in candidate selection and can be
16255     //                    safely ignored
16256     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
16257     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
16258     //                       but aren't necessarily fromt the same base node, so
16259     //                       cycles possible (e.g. via indexed store).
16260     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
16261     //               non-indexed stores). Not constant on all targets (e.g. ARM)
16262     //               and so can participate in a cycle.
16263     for (unsigned j = 1; j < N->getNumOperands(); ++j)
16264       Worklist.push_back(N->getOperand(j).getNode());
16265   }
16266   // Search through DAG. We can stop early if we find a store node.
16267   for (unsigned i = 0; i < NumStores; ++i)
16268     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
16269                                      Max)) {
16270       // If the searching bail out, record the StoreNode and RootNode in the
16271       // StoreRootCountMap. If we have seen the pair many times over a limit,
16272       // we won't add the StoreNode into StoreNodes set again.
16273       if (Visited.size() >= Max) {
16274         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
16275         if (RootCount.first == RootNode)
16276           RootCount.second++;
16277         else
16278           RootCount = {RootNode, 1};
16279       }
16280       return false;
16281     }
16282   return true;
16283 }
16284 
16285 unsigned
16286 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
16287                                   int64_t ElementSizeBytes) const {
16288   while (true) {
16289     // Find a store past the width of the first store.
16290     size_t StartIdx = 0;
16291     while ((StartIdx + 1 < StoreNodes.size()) &&
16292            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
16293               StoreNodes[StartIdx + 1].OffsetFromBase)
16294       ++StartIdx;
16295 
16296     // Bail if we don't have enough candidates to merge.
16297     if (StartIdx + 1 >= StoreNodes.size())
16298       return 0;
16299 
16300     // Trim stores that overlapped with the first store.
16301     if (StartIdx)
16302       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
16303 
16304     // Scan the memory operations on the chain and find the first
16305     // non-consecutive store memory address.
16306     unsigned NumConsecutiveStores = 1;
16307     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16308     // Check that the addresses are consecutive starting from the second
16309     // element in the list of stores.
16310     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
16311       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
16312       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16313         break;
16314       NumConsecutiveStores = i + 1;
16315     }
16316     if (NumConsecutiveStores > 1)
16317       return NumConsecutiveStores;
16318 
16319     // There are no consecutive stores at the start of the list.
16320     // Remove the first store and try again.
16321     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
16322   }
16323 }
16324 
16325 bool DAGCombiner::tryStoreMergeOfConstants(
16326     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16327     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
16328   LLVMContext &Context = *DAG.getContext();
16329   const DataLayout &DL = DAG.getDataLayout();
16330   int64_t ElementSizeBytes = MemVT.getStoreSize();
16331   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16332   bool MadeChange = false;
16333 
16334   // Store the constants into memory as one consecutive store.
16335   while (NumConsecutiveStores >= 2) {
16336     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16337     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16338     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16339     unsigned LastLegalType = 1;
16340     unsigned LastLegalVectorType = 1;
16341     bool LastIntegerTrunc = false;
16342     bool NonZero = false;
16343     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
16344     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16345       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
16346       SDValue StoredVal = ST->getValue();
16347       bool IsElementZero = false;
16348       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
16349         IsElementZero = C->isNullValue();
16350       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
16351         IsElementZero = C->getConstantFPValue()->isNullValue();
16352       if (IsElementZero) {
16353         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
16354           FirstZeroAfterNonZero = i;
16355       }
16356       NonZero |= !IsElementZero;
16357 
16358       // Find a legal type for the constant store.
16359       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16360       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16361       bool IsFast = false;
16362 
16363       // Break early when size is too large to be legal.
16364       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16365         break;
16366 
16367       if (TLI.isTypeLegal(StoreTy) &&
16368           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16369           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16370                                  *FirstInChain->getMemOperand(), &IsFast) &&
16371           IsFast) {
16372         LastIntegerTrunc = false;
16373         LastLegalType = i + 1;
16374         // Or check whether a truncstore is legal.
16375       } else if (TLI.getTypeAction(Context, StoreTy) ==
16376                  TargetLowering::TypePromoteInteger) {
16377         EVT LegalizedStoredValTy =
16378             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
16379         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16380             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16381             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16382                                    *FirstInChain->getMemOperand(), &IsFast) &&
16383             IsFast) {
16384           LastIntegerTrunc = true;
16385           LastLegalType = i + 1;
16386         }
16387       }
16388 
16389       // We only use vectors if the constant is known to be zero or the
16390       // target allows it and the function is not marked with the
16391       // noimplicitfloat attribute.
16392       if ((!NonZero ||
16393            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
16394           AllowVectors) {
16395         // Find a legal type for the vector store.
16396         unsigned Elts = (i + 1) * NumMemElts;
16397         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16398         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
16399             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16400             TLI.allowsMemoryAccess(Context, DL, Ty,
16401                                    *FirstInChain->getMemOperand(), &IsFast) &&
16402             IsFast)
16403           LastLegalVectorType = i + 1;
16404       }
16405     }
16406 
16407     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
16408     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
16409 
16410     // Check if we found a legal integer type that creates a meaningful
16411     // merge.
16412     if (NumElem < 2) {
16413       // We know that candidate stores are in order and of correct
16414       // shape. While there is no mergeable sequence from the
16415       // beginning one may start later in the sequence. The only
16416       // reason a merge of size N could have failed where another of
16417       // the same size would not have, is if the alignment has
16418       // improved or we've dropped a non-zero value. Drop as many
16419       // candidates as we can here.
16420       unsigned NumSkip = 1;
16421       while ((NumSkip < NumConsecutiveStores) &&
16422              (NumSkip < FirstZeroAfterNonZero) &&
16423              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16424         NumSkip++;
16425 
16426       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16427       NumConsecutiveStores -= NumSkip;
16428       continue;
16429     }
16430 
16431     // Check that we can merge these candidates without causing a cycle.
16432     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16433                                                   RootNode)) {
16434       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16435       NumConsecutiveStores -= NumElem;
16436       continue;
16437     }
16438 
16439     MadeChange |= mergeStoresOfConstantsOrVecElts(
16440         StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
16441 
16442     // Remove merged stores for next iteration.
16443     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16444     NumConsecutiveStores -= NumElem;
16445   }
16446   return MadeChange;
16447 }
16448 
16449 bool DAGCombiner::tryStoreMergeOfExtracts(
16450     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16451     EVT MemVT, SDNode *RootNode) {
16452   LLVMContext &Context = *DAG.getContext();
16453   const DataLayout &DL = DAG.getDataLayout();
16454   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16455   bool MadeChange = false;
16456 
16457   // Loop on Consecutive Stores on success.
16458   while (NumConsecutiveStores >= 2) {
16459     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16460     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16461     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16462     unsigned NumStoresToMerge = 1;
16463     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16464       // Find a legal type for the vector store.
16465       unsigned Elts = (i + 1) * NumMemElts;
16466       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16467       bool IsFast = false;
16468 
16469       // Break early when size is too large to be legal.
16470       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
16471         break;
16472 
16473       if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16474           TLI.allowsMemoryAccess(Context, DL, Ty,
16475                                  *FirstInChain->getMemOperand(), &IsFast) &&
16476           IsFast)
16477         NumStoresToMerge = i + 1;
16478     }
16479 
16480     // Check if we found a legal integer type creating a meaningful
16481     // merge.
16482     if (NumStoresToMerge < 2) {
16483       // We know that candidate stores are in order and of correct
16484       // shape. While there is no mergeable sequence from the
16485       // beginning one may start later in the sequence. The only
16486       // reason a merge of size N could have failed where another of
16487       // the same size would not have, is if the alignment has
16488       // improved. Drop as many candidates as we can here.
16489       unsigned NumSkip = 1;
16490       while ((NumSkip < NumConsecutiveStores) &&
16491              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16492         NumSkip++;
16493 
16494       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16495       NumConsecutiveStores -= NumSkip;
16496       continue;
16497     }
16498 
16499     // Check that we can merge these candidates without causing a cycle.
16500     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
16501                                                   RootNode)) {
16502       StoreNodes.erase(StoreNodes.begin(),
16503                        StoreNodes.begin() + NumStoresToMerge);
16504       NumConsecutiveStores -= NumStoresToMerge;
16505       continue;
16506     }
16507 
16508     MadeChange |= mergeStoresOfConstantsOrVecElts(
16509         StoreNodes, MemVT, NumStoresToMerge, false, true, false);
16510 
16511     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
16512     NumConsecutiveStores -= NumStoresToMerge;
16513   }
16514   return MadeChange;
16515 }
16516 
16517 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
16518                                        unsigned NumConsecutiveStores, EVT MemVT,
16519                                        SDNode *RootNode, bool AllowVectors,
16520                                        bool IsNonTemporalStore,
16521                                        bool IsNonTemporalLoad) {
16522   LLVMContext &Context = *DAG.getContext();
16523   const DataLayout &DL = DAG.getDataLayout();
16524   int64_t ElementSizeBytes = MemVT.getStoreSize();
16525   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16526   bool MadeChange = false;
16527 
16528   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16529 
16530   // Look for load nodes which are used by the stored values.
16531   SmallVector<MemOpLink, 8> LoadNodes;
16532 
16533   // Find acceptable loads. Loads need to have the same chain (token factor),
16534   // must not be zext, volatile, indexed, and they must be consecutive.
16535   BaseIndexOffset LdBasePtr;
16536 
16537   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16538     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16539     SDValue Val = peekThroughBitcasts(St->getValue());
16540     LoadSDNode *Ld = cast<LoadSDNode>(Val);
16541 
16542     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
16543     // If this is not the first ptr that we check.
16544     int64_t LdOffset = 0;
16545     if (LdBasePtr.getBase().getNode()) {
16546       // The base ptr must be the same.
16547       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
16548         break;
16549     } else {
16550       // Check that all other base pointers are the same as this one.
16551       LdBasePtr = LdPtr;
16552     }
16553 
16554     // We found a potential memory operand to merge.
16555     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
16556   }
16557 
16558   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
16559     Align RequiredAlignment;
16560     bool NeedRotate = false;
16561     if (LoadNodes.size() == 2) {
16562       // If we have load/store pair instructions and we only have two values,
16563       // don't bother merging.
16564       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
16565           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
16566         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
16567         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
16568         break;
16569       }
16570       // If the loads are reversed, see if we can rotate the halves into place.
16571       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
16572       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
16573       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
16574       if (Offset0 - Offset1 == ElementSizeBytes &&
16575           (hasOperation(ISD::ROTL, PairVT) ||
16576            hasOperation(ISD::ROTR, PairVT))) {
16577         std::swap(LoadNodes[0], LoadNodes[1]);
16578         NeedRotate = true;
16579       }
16580     }
16581     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16582     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16583     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16584     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
16585 
16586     // Scan the memory operations on the chain and find the first
16587     // non-consecutive load memory address. These variables hold the index in
16588     // the store node array.
16589 
16590     unsigned LastConsecutiveLoad = 1;
16591 
16592     // This variable refers to the size and not index in the array.
16593     unsigned LastLegalVectorType = 1;
16594     unsigned LastLegalIntegerType = 1;
16595     bool isDereferenceable = true;
16596     bool DoIntegerTruncate = false;
16597     StartAddress = LoadNodes[0].OffsetFromBase;
16598     SDValue LoadChain = FirstLoad->getChain();
16599     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
16600       // All loads must share the same chain.
16601       if (LoadNodes[i].MemNode->getChain() != LoadChain)
16602         break;
16603 
16604       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
16605       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16606         break;
16607       LastConsecutiveLoad = i;
16608 
16609       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
16610         isDereferenceable = false;
16611 
16612       // Find a legal type for the vector store.
16613       unsigned Elts = (i + 1) * NumMemElts;
16614       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16615 
16616       // Break early when size is too large to be legal.
16617       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16618         break;
16619 
16620       bool IsFastSt = false;
16621       bool IsFastLd = false;
16622       if (TLI.isTypeLegal(StoreTy) &&
16623           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16624           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16625                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
16626           IsFastSt &&
16627           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16628                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
16629           IsFastLd) {
16630         LastLegalVectorType = i + 1;
16631       }
16632 
16633       // Find a legal type for the integer store.
16634       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16635       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16636       if (TLI.isTypeLegal(StoreTy) &&
16637           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16638           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16639                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
16640           IsFastSt &&
16641           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16642                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
16643           IsFastLd) {
16644         LastLegalIntegerType = i + 1;
16645         DoIntegerTruncate = false;
16646         // Or check whether a truncstore and extload is legal.
16647       } else if (TLI.getTypeAction(Context, StoreTy) ==
16648                  TargetLowering::TypePromoteInteger) {
16649         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
16650         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16651             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16652             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
16653             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
16654             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
16655             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16656                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
16657             IsFastSt &&
16658             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16659                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
16660             IsFastLd) {
16661           LastLegalIntegerType = i + 1;
16662           DoIntegerTruncate = true;
16663         }
16664       }
16665     }
16666 
16667     // Only use vector types if the vector type is larger than the integer
16668     // type. If they are the same, use integers.
16669     bool UseVectorTy =
16670         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
16671     unsigned LastLegalType =
16672         std::max(LastLegalVectorType, LastLegalIntegerType);
16673 
16674     // We add +1 here because the LastXXX variables refer to location while
16675     // the NumElem refers to array/index size.
16676     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
16677     NumElem = std::min(LastLegalType, NumElem);
16678     unsigned FirstLoadAlign = FirstLoad->getAlignment();
16679 
16680     if (NumElem < 2) {
16681       // We know that candidate stores are in order and of correct
16682       // shape. While there is no mergeable sequence from the
16683       // beginning one may start later in the sequence. The only
16684       // reason a merge of size N could have failed where another of
16685       // the same size would not have is if the alignment or either
16686       // the load or store has improved. Drop as many candidates as we
16687       // can here.
16688       unsigned NumSkip = 1;
16689       while ((NumSkip < LoadNodes.size()) &&
16690              (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
16691              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16692         NumSkip++;
16693       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16694       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
16695       NumConsecutiveStores -= NumSkip;
16696       continue;
16697     }
16698 
16699     // Check that we can merge these candidates without causing a cycle.
16700     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16701                                                   RootNode)) {
16702       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16703       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16704       NumConsecutiveStores -= NumElem;
16705       continue;
16706     }
16707 
16708     // Find if it is better to use vectors or integers to load and store
16709     // to memory.
16710     EVT JointMemOpVT;
16711     if (UseVectorTy) {
16712       // Find a legal type for the vector store.
16713       unsigned Elts = NumElem * NumMemElts;
16714       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16715     } else {
16716       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
16717       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
16718     }
16719 
16720     SDLoc LoadDL(LoadNodes[0].MemNode);
16721     SDLoc StoreDL(StoreNodes[0].MemNode);
16722 
16723     // The merged loads are required to have the same incoming chain, so
16724     // using the first's chain is acceptable.
16725 
16726     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
16727     AddToWorklist(NewStoreChain.getNode());
16728 
16729     MachineMemOperand::Flags LdMMOFlags =
16730         isDereferenceable ? MachineMemOperand::MODereferenceable
16731                           : MachineMemOperand::MONone;
16732     if (IsNonTemporalLoad)
16733       LdMMOFlags |= MachineMemOperand::MONonTemporal;
16734 
16735     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
16736                                               ? MachineMemOperand::MONonTemporal
16737                                               : MachineMemOperand::MONone;
16738 
16739     SDValue NewLoad, NewStore;
16740     if (UseVectorTy || !DoIntegerTruncate) {
16741       NewLoad = DAG.getLoad(
16742           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
16743           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
16744       SDValue StoreOp = NewLoad;
16745       if (NeedRotate) {
16746         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
16747         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
16748                "Unexpected type for rotate-able load pair");
16749         SDValue RotAmt =
16750             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
16751         // Target can convert to the identical ROTR if it does not have ROTL.
16752         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
16753       }
16754       NewStore = DAG.getStore(
16755           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
16756           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16757     } else { // This must be the truncstore/extload case
16758       EVT ExtendedTy =
16759           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16760       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16761                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
16762                                FirstLoad->getPointerInfo(), JointMemOpVT,
16763                                FirstLoadAlign, LdMMOFlags);
16764       NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
16765                                    FirstInChain->getBasePtr(),
16766                                    FirstInChain->getPointerInfo(), JointMemOpVT,
16767                                    FirstInChain->getAlignment(),
16768                                    FirstInChain->getMemOperand()->getFlags());
16769     }
16770 
16771     // Transfer chain users from old loads to the new load.
16772     for (unsigned i = 0; i < NumElem; ++i) {
16773       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16774       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
16775                                     SDValue(NewLoad.getNode(), 1));
16776     }
16777 
16778     // Replace all stores with the new store. Recursively remove corresponding
16779     // values if they are no longer used.
16780     for (unsigned i = 0; i < NumElem; ++i) {
16781       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16782       CombineTo(StoreNodes[i].MemNode, NewStore);
16783       if (Val.getNode()->use_empty())
16784         recursivelyDeleteUnusedNodes(Val.getNode());
16785     }
16786 
16787     MadeChange = true;
16788     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16789     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16790     NumConsecutiveStores -= NumElem;
16791   }
16792   return MadeChange;
16793 }
16794 
16795 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
16796   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
16797     return false;
16798 
16799   // TODO: Extend this function to merge stores of scalable vectors.
16800   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
16801   // store since we know <vscale x 16 x i8> is exactly twice as large as
16802   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
16803   EVT MemVT = St->getMemoryVT();
16804   if (MemVT.isScalableVector())
16805     return false;
16806   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
16807     return false;
16808 
16809   // This function cannot currently deal with non-byte-sized memory sizes.
16810   int64_t ElementSizeBytes = MemVT.getStoreSize();
16811   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
16812     return false;
16813 
16814   // Do not bother looking at stored values that are not constants, loads, or
16815   // extracted vector elements.
16816   SDValue StoredVal = peekThroughBitcasts(St->getValue());
16817   const StoreSource StoreSrc = getStoreSource(StoredVal);
16818   if (StoreSrc == StoreSource::Unknown)
16819     return false;
16820 
16821   SmallVector<MemOpLink, 8> StoreNodes;
16822   SDNode *RootNode;
16823   // Find potential store merge candidates by searching through chain sub-DAG
16824   getStoreMergeCandidates(St, StoreNodes, RootNode);
16825 
16826   // Check if there is anything to merge.
16827   if (StoreNodes.size() < 2)
16828     return false;
16829 
16830   // Sort the memory operands according to their distance from the
16831   // base pointer.
16832   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
16833     return LHS.OffsetFromBase < RHS.OffsetFromBase;
16834   });
16835 
16836   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
16837       Attribute::NoImplicitFloat);
16838   bool IsNonTemporalStore = St->isNonTemporal();
16839   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
16840                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
16841 
16842   // Store Merge attempts to merge the lowest stores. This generally
16843   // works out as if successful, as the remaining stores are checked
16844   // after the first collection of stores is merged. However, in the
16845   // case that a non-mergeable store is found first, e.g., {p[-2],
16846   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
16847   // mergeable cases. To prevent this, we prune such stores from the
16848   // front of StoreNodes here.
16849   bool MadeChange = false;
16850   while (StoreNodes.size() > 1) {
16851     unsigned NumConsecutiveStores =
16852         getConsecutiveStores(StoreNodes, ElementSizeBytes);
16853     // There are no more stores in the list to examine.
16854     if (NumConsecutiveStores == 0)
16855       return MadeChange;
16856 
16857     // We have at least 2 consecutive stores. Try to merge them.
16858     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
16859     switch (StoreSrc) {
16860     case StoreSource::Constant:
16861       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
16862                                              MemVT, RootNode, AllowVectors);
16863       break;
16864 
16865     case StoreSource::Extract:
16866       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
16867                                             MemVT, RootNode);
16868       break;
16869 
16870     case StoreSource::Load:
16871       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
16872                                          MemVT, RootNode, AllowVectors,
16873                                          IsNonTemporalStore, IsNonTemporalLoad);
16874       break;
16875 
16876     default:
16877       llvm_unreachable("Unhandled store source type");
16878     }
16879   }
16880   return MadeChange;
16881 }
16882 
16883 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
16884   SDLoc SL(ST);
16885   SDValue ReplStore;
16886 
16887   // Replace the chain to avoid dependency.
16888   if (ST->isTruncatingStore()) {
16889     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
16890                                   ST->getBasePtr(), ST->getMemoryVT(),
16891                                   ST->getMemOperand());
16892   } else {
16893     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
16894                              ST->getMemOperand());
16895   }
16896 
16897   // Create token to keep both nodes around.
16898   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16899                               MVT::Other, ST->getChain(), ReplStore);
16900 
16901   // Make sure the new and old chains are cleaned up.
16902   AddToWorklist(Token.getNode());
16903 
16904   // Don't add users to work list.
16905   return CombineTo(ST, Token, false);
16906 }
16907 
16908 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16909   SDValue Value = ST->getValue();
16910   if (Value.getOpcode() == ISD::TargetConstantFP)
16911     return SDValue();
16912 
16913   if (!ISD::isNormalStore(ST))
16914     return SDValue();
16915 
16916   SDLoc DL(ST);
16917 
16918   SDValue Chain = ST->getChain();
16919   SDValue Ptr = ST->getBasePtr();
16920 
16921   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16922 
16923   // NOTE: If the original store is volatile, this transform must not increase
16924   // the number of stores.  For example, on x86-32 an f64 can be stored in one
16925   // processor operation but an i64 (which is not legal) requires two.  So the
16926   // transform should not be done in this case.
16927 
16928   SDValue Tmp;
16929   switch (CFP->getSimpleValueType(0).SimpleTy) {
16930   default:
16931     llvm_unreachable("Unknown FP type");
16932   case MVT::f16:    // We don't do this for these yet.
16933   case MVT::f80:
16934   case MVT::f128:
16935   case MVT::ppcf128:
16936     return SDValue();
16937   case MVT::f32:
16938     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
16939         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16940       ;
16941       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16942                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16943                             MVT::i32);
16944       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16945     }
16946 
16947     return SDValue();
16948   case MVT::f64:
16949     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
16950          ST->isSimple()) ||
16951         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
16952       ;
16953       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16954                             getZExtValue(), SDLoc(CFP), MVT::i64);
16955       return DAG.getStore(Chain, DL, Tmp,
16956                           Ptr, ST->getMemOperand());
16957     }
16958 
16959     if (ST->isSimple() &&
16960         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16961       // Many FP stores are not made apparent until after legalize, e.g. for
16962       // argument passing.  Since this is so common, custom legalize the
16963       // 64-bit integer store into two 32-bit stores.
16964       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16965       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16966       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16967       if (DAG.getDataLayout().isBigEndian())
16968         std::swap(Lo, Hi);
16969 
16970       unsigned Alignment = ST->getAlignment();
16971       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16972       AAMDNodes AAInfo = ST->getAAInfo();
16973 
16974       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16975                                  ST->getAlignment(), MMOFlags, AAInfo);
16976       Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
16977       Alignment = MinAlign(Alignment, 4U);
16978       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16979                                  ST->getPointerInfo().getWithOffset(4),
16980                                  Alignment, MMOFlags, AAInfo);
16981       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16982                          St0, St1);
16983     }
16984 
16985     return SDValue();
16986   }
16987 }
16988 
16989 SDValue DAGCombiner::visitSTORE(SDNode *N) {
16990   StoreSDNode *ST  = cast<StoreSDNode>(N);
16991   SDValue Chain = ST->getChain();
16992   SDValue Value = ST->getValue();
16993   SDValue Ptr   = ST->getBasePtr();
16994 
16995   // If this is a store of a bit convert, store the input value if the
16996   // resultant store does not need a higher alignment than the original.
16997   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
16998       ST->isUnindexed()) {
16999     EVT SVT = Value.getOperand(0).getValueType();
17000     // If the store is volatile, we only want to change the store type if the
17001     // resulting store is legal. Otherwise we might increase the number of
17002     // memory accesses. We don't care if the original type was legal or not
17003     // as we assume software couldn't rely on the number of accesses of an
17004     // illegal type.
17005     // TODO: May be able to relax for unordered atomics (see D66309)
17006     if (((!LegalOperations && ST->isSimple()) ||
17007          TLI.isOperationLegal(ISD::STORE, SVT)) &&
17008         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
17009                                      DAG, *ST->getMemOperand())) {
17010       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
17011                           ST->getMemOperand());
17012     }
17013   }
17014 
17015   // Turn 'store undef, Ptr' -> nothing.
17016   if (Value.isUndef() && ST->isUnindexed())
17017     return Chain;
17018 
17019   // Try to infer better alignment information than the store already has.
17020   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
17021     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17022       if (*Alignment > ST->getAlign() &&
17023           isAligned(*Alignment, ST->getSrcValueOffset())) {
17024         SDValue NewStore =
17025             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
17026                               ST->getMemoryVT(), *Alignment,
17027                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
17028         // NewStore will always be N as we are only refining the alignment
17029         assert(NewStore.getNode() == N);
17030         (void)NewStore;
17031       }
17032     }
17033   }
17034 
17035   // Try transforming a pair floating point load / store ops to integer
17036   // load / store ops.
17037   if (SDValue NewST = TransformFPLoadStorePair(N))
17038     return NewST;
17039 
17040   // Try transforming several stores into STORE (BSWAP).
17041   if (SDValue Store = MatchStoreCombine(ST))
17042     return Store;
17043 
17044   if (ST->isUnindexed()) {
17045     // Walk up chain skipping non-aliasing memory nodes, on this store and any
17046     // adjacent stores.
17047     if (findBetterNeighborChains(ST)) {
17048       // replaceStoreChain uses CombineTo, which handled all of the worklist
17049       // manipulation. Return the original node to not do anything else.
17050       return SDValue(ST, 0);
17051     }
17052     Chain = ST->getChain();
17053   }
17054 
17055   // FIXME: is there such a thing as a truncating indexed store?
17056   if (ST->isTruncatingStore() && ST->isUnindexed() &&
17057       Value.getValueType().isInteger() &&
17058       (!isa<ConstantSDNode>(Value) ||
17059        !cast<ConstantSDNode>(Value)->isOpaque())) {
17060     APInt TruncDemandedBits =
17061         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
17062                              ST->getMemoryVT().getScalarSizeInBits());
17063 
17064     // See if we can simplify the input to this truncstore with knowledge that
17065     // only the low bits are being used.  For example:
17066     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
17067     AddToWorklist(Value.getNode());
17068     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
17069       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
17070                                ST->getMemOperand());
17071 
17072     // Otherwise, see if we can simplify the operation with
17073     // SimplifyDemandedBits, which only works if the value has a single use.
17074     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
17075       // Re-visit the store if anything changed and the store hasn't been merged
17076       // with another node (N is deleted) SimplifyDemandedBits will add Value's
17077       // node back to the worklist if necessary, but we also need to re-visit
17078       // the Store node itself.
17079       if (N->getOpcode() != ISD::DELETED_NODE)
17080         AddToWorklist(N);
17081       return SDValue(N, 0);
17082     }
17083   }
17084 
17085   // If this is a load followed by a store to the same location, then the store
17086   // is dead/noop.
17087   // TODO: Can relax for unordered atomics (see D66309)
17088   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
17089     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
17090         ST->isUnindexed() && ST->isSimple() &&
17091         // There can't be any side effects between the load and store, such as
17092         // a call or store.
17093         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
17094       // The store is dead, remove it.
17095       return Chain;
17096     }
17097   }
17098 
17099   // TODO: Can relax for unordered atomics (see D66309)
17100   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
17101     if (ST->isUnindexed() && ST->isSimple() &&
17102         ST1->isUnindexed() && ST1->isSimple()) {
17103       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
17104           ST->getMemoryVT() == ST1->getMemoryVT()) {
17105         // If this is a store followed by a store with the same value to the
17106         // same location, then the store is dead/noop.
17107         return Chain;
17108       }
17109 
17110       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
17111           !ST1->getBasePtr().isUndef() &&
17112           // BaseIndexOffset and the code below requires knowing the size
17113           // of a vector, so bail out if MemoryVT is scalable.
17114           !ST1->getMemoryVT().isScalableVector()) {
17115         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
17116         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
17117         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
17118         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
17119         // If this is a store who's preceding store to a subset of the current
17120         // location and no one other node is chained to that store we can
17121         // effectively drop the store. Do not remove stores to undef as they may
17122         // be used as data sinks.
17123         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
17124           CombineTo(ST1, ST1->getChain());
17125           return SDValue();
17126         }
17127       }
17128     }
17129   }
17130 
17131   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
17132   // truncating store.  We can do this even if this is already a truncstore.
17133   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
17134       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
17135       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
17136                             ST->getMemoryVT())) {
17137     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
17138                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
17139   }
17140 
17141   // Always perform this optimization before types are legal. If the target
17142   // prefers, also try this after legalization to catch stores that were created
17143   // by intrinsics or other nodes.
17144   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
17145     while (true) {
17146       // There can be multiple store sequences on the same chain.
17147       // Keep trying to merge store sequences until we are unable to do so
17148       // or until we merge the last store on the chain.
17149       bool Changed = mergeConsecutiveStores(ST);
17150       if (!Changed) break;
17151       // Return N as merge only uses CombineTo and no worklist clean
17152       // up is necessary.
17153       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
17154         return SDValue(N, 0);
17155     }
17156   }
17157 
17158   // Try transforming N to an indexed store.
17159   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
17160     return SDValue(N, 0);
17161 
17162   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
17163   //
17164   // Make sure to do this only after attempting to merge stores in order to
17165   //  avoid changing the types of some subset of stores due to visit order,
17166   //  preventing their merging.
17167   if (isa<ConstantFPSDNode>(ST->getValue())) {
17168     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
17169       return NewSt;
17170   }
17171 
17172   if (SDValue NewSt = splitMergedValStore(ST))
17173     return NewSt;
17174 
17175   return ReduceLoadOpStoreWidth(N);
17176 }
17177 
17178 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
17179   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
17180   if (!LifetimeEnd->hasOffset())
17181     return SDValue();
17182 
17183   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
17184                                         LifetimeEnd->getOffset(), false);
17185 
17186   // We walk up the chains to find stores.
17187   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
17188   while (!Chains.empty()) {
17189     SDValue Chain = Chains.back();
17190     Chains.pop_back();
17191     if (!Chain.hasOneUse())
17192       continue;
17193     switch (Chain.getOpcode()) {
17194     case ISD::TokenFactor:
17195       for (unsigned Nops = Chain.getNumOperands(); Nops;)
17196         Chains.push_back(Chain.getOperand(--Nops));
17197       break;
17198     case ISD::LIFETIME_START:
17199     case ISD::LIFETIME_END:
17200       // We can forward past any lifetime start/end that can be proven not to
17201       // alias the node.
17202       if (!isAlias(Chain.getNode(), N))
17203         Chains.push_back(Chain.getOperand(0));
17204       break;
17205     case ISD::STORE: {
17206       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
17207       // TODO: Can relax for unordered atomics (see D66309)
17208       if (!ST->isSimple() || ST->isIndexed())
17209         continue;
17210       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
17211       // If we store purely within object bounds just before its lifetime ends,
17212       // we can remove the store.
17213       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
17214                                    ST->getMemoryVT().getStoreSizeInBits())) {
17215         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
17216                    dbgs() << "\nwithin LIFETIME_END of : ";
17217                    LifetimeEndBase.dump(); dbgs() << "\n");
17218         CombineTo(ST, ST->getChain());
17219         return SDValue(N, 0);
17220       }
17221     }
17222     }
17223   }
17224   return SDValue();
17225 }
17226 
17227 /// For the instruction sequence of store below, F and I values
17228 /// are bundled together as an i64 value before being stored into memory.
17229 /// Sometimes it is more efficent to generate separate stores for F and I,
17230 /// which can remove the bitwise instructions or sink them to colder places.
17231 ///
17232 ///   (store (or (zext (bitcast F to i32) to i64),
17233 ///              (shl (zext I to i64), 32)), addr)  -->
17234 ///   (store F, addr) and (store I, addr+4)
17235 ///
17236 /// Similarly, splitting for other merged store can also be beneficial, like:
17237 /// For pair of {i32, i32}, i64 store --> two i32 stores.
17238 /// For pair of {i32, i16}, i64 store --> two i32 stores.
17239 /// For pair of {i16, i16}, i32 store --> two i16 stores.
17240 /// For pair of {i16, i8},  i32 store --> two i16 stores.
17241 /// For pair of {i8, i8},   i16 store --> two i8 stores.
17242 ///
17243 /// We allow each target to determine specifically which kind of splitting is
17244 /// supported.
17245 ///
17246 /// The store patterns are commonly seen from the simple code snippet below
17247 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
17248 ///   void goo(const std::pair<int, float> &);
17249 ///   hoo() {
17250 ///     ...
17251 ///     goo(std::make_pair(tmp, ftmp));
17252 ///     ...
17253 ///   }
17254 ///
17255 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
17256   if (OptLevel == CodeGenOpt::None)
17257     return SDValue();
17258 
17259   // Can't change the number of memory accesses for a volatile store or break
17260   // atomicity for an atomic one.
17261   if (!ST->isSimple())
17262     return SDValue();
17263 
17264   SDValue Val = ST->getValue();
17265   SDLoc DL(ST);
17266 
17267   // Match OR operand.
17268   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
17269     return SDValue();
17270 
17271   // Match SHL operand and get Lower and Higher parts of Val.
17272   SDValue Op1 = Val.getOperand(0);
17273   SDValue Op2 = Val.getOperand(1);
17274   SDValue Lo, Hi;
17275   if (Op1.getOpcode() != ISD::SHL) {
17276     std::swap(Op1, Op2);
17277     if (Op1.getOpcode() != ISD::SHL)
17278       return SDValue();
17279   }
17280   Lo = Op2;
17281   Hi = Op1.getOperand(0);
17282   if (!Op1.hasOneUse())
17283     return SDValue();
17284 
17285   // Match shift amount to HalfValBitSize.
17286   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
17287   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
17288   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
17289     return SDValue();
17290 
17291   // Lo and Hi are zero-extended from int with size less equal than 32
17292   // to i64.
17293   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
17294       !Lo.getOperand(0).getValueType().isScalarInteger() ||
17295       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
17296       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
17297       !Hi.getOperand(0).getValueType().isScalarInteger() ||
17298       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
17299     return SDValue();
17300 
17301   // Use the EVT of low and high parts before bitcast as the input
17302   // of target query.
17303   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
17304                   ? Lo.getOperand(0).getValueType()
17305                   : Lo.getValueType();
17306   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
17307                    ? Hi.getOperand(0).getValueType()
17308                    : Hi.getValueType();
17309   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
17310     return SDValue();
17311 
17312   // Start to split store.
17313   unsigned Alignment = ST->getAlignment();
17314   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17315   AAMDNodes AAInfo = ST->getAAInfo();
17316 
17317   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
17318   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
17319   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
17320   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
17321 
17322   SDValue Chain = ST->getChain();
17323   SDValue Ptr = ST->getBasePtr();
17324   // Lower value store.
17325   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17326                              ST->getAlignment(), MMOFlags, AAInfo);
17327   Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
17328   // Higher value store.
17329   SDValue St1 =
17330       DAG.getStore(St0, DL, Hi, Ptr,
17331                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
17332                    Alignment / 2, MMOFlags, AAInfo);
17333   return St1;
17334 }
17335 
17336 /// Convert a disguised subvector insertion into a shuffle:
17337 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
17338   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
17339          "Expected extract_vector_elt");
17340   SDValue InsertVal = N->getOperand(1);
17341   SDValue Vec = N->getOperand(0);
17342 
17343   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
17344   // InsIndex)
17345   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
17346   //   CONCAT_VECTORS.
17347   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
17348       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17349       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
17350     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
17351     ArrayRef<int> Mask = SVN->getMask();
17352 
17353     SDValue X = Vec.getOperand(0);
17354     SDValue Y = Vec.getOperand(1);
17355 
17356     // Vec's operand 0 is using indices from 0 to N-1 and
17357     // operand 1 from N to 2N - 1, where N is the number of
17358     // elements in the vectors.
17359     SDValue InsertVal0 = InsertVal.getOperand(0);
17360     int ElementOffset = -1;
17361 
17362     // We explore the inputs of the shuffle in order to see if we find the
17363     // source of the extract_vector_elt. If so, we can use it to modify the
17364     // shuffle rather than perform an insert_vector_elt.
17365     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
17366     ArgWorkList.emplace_back(Mask.size(), Y);
17367     ArgWorkList.emplace_back(0, X);
17368 
17369     while (!ArgWorkList.empty()) {
17370       int ArgOffset;
17371       SDValue ArgVal;
17372       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
17373 
17374       if (ArgVal == InsertVal0) {
17375         ElementOffset = ArgOffset;
17376         break;
17377       }
17378 
17379       // Peek through concat_vector.
17380       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
17381         int CurrentArgOffset =
17382             ArgOffset + ArgVal.getValueType().getVectorNumElements();
17383         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
17384         for (SDValue Op : reverse(ArgVal->ops())) {
17385           CurrentArgOffset -= Step;
17386           ArgWorkList.emplace_back(CurrentArgOffset, Op);
17387         }
17388 
17389         // Make sure we went through all the elements and did not screw up index
17390         // computation.
17391         assert(CurrentArgOffset == ArgOffset);
17392       }
17393     }
17394 
17395     if (ElementOffset != -1) {
17396       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
17397 
17398       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
17399       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
17400       assert(NewMask[InsIndex] <
17401                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
17402              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
17403 
17404       SDValue LegalShuffle =
17405               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
17406                                           Y, NewMask, DAG);
17407       if (LegalShuffle)
17408         return LegalShuffle;
17409     }
17410   }
17411 
17412   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
17413   // bitcast(shuffle (bitcast V), (extended X), Mask)
17414   // Note: We do not use an insert_subvector node because that requires a
17415   // legal subvector type.
17416   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
17417       !InsertVal.getOperand(0).getValueType().isVector())
17418     return SDValue();
17419 
17420   SDValue SubVec = InsertVal.getOperand(0);
17421   SDValue DestVec = N->getOperand(0);
17422   EVT SubVecVT = SubVec.getValueType();
17423   EVT VT = DestVec.getValueType();
17424   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
17425   // If the source only has a single vector element, the cost of creating adding
17426   // it to a vector is likely to exceed the cost of a insert_vector_elt.
17427   if (NumSrcElts == 1)
17428     return SDValue();
17429   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
17430   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
17431 
17432   // Step 1: Create a shuffle mask that implements this insert operation. The
17433   // vector that we are inserting into will be operand 0 of the shuffle, so
17434   // those elements are just 'i'. The inserted subvector is in the first
17435   // positions of operand 1 of the shuffle. Example:
17436   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
17437   SmallVector<int, 16> Mask(NumMaskVals);
17438   for (unsigned i = 0; i != NumMaskVals; ++i) {
17439     if (i / NumSrcElts == InsIndex)
17440       Mask[i] = (i % NumSrcElts) + NumMaskVals;
17441     else
17442       Mask[i] = i;
17443   }
17444 
17445   // Bail out if the target can not handle the shuffle we want to create.
17446   EVT SubVecEltVT = SubVecVT.getVectorElementType();
17447   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
17448   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
17449     return SDValue();
17450 
17451   // Step 2: Create a wide vector from the inserted source vector by appending
17452   // undefined elements. This is the same size as our destination vector.
17453   SDLoc DL(N);
17454   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
17455   ConcatOps[0] = SubVec;
17456   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
17457 
17458   // Step 3: Shuffle in the padded subvector.
17459   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
17460   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
17461   AddToWorklist(PaddedSubV.getNode());
17462   AddToWorklist(DestVecBC.getNode());
17463   AddToWorklist(Shuf.getNode());
17464   return DAG.getBitcast(VT, Shuf);
17465 }
17466 
17467 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
17468   SDValue InVec = N->getOperand(0);
17469   SDValue InVal = N->getOperand(1);
17470   SDValue EltNo = N->getOperand(2);
17471   SDLoc DL(N);
17472 
17473   EVT VT = InVec.getValueType();
17474   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17475 
17476   // Insert into out-of-bounds element is undefined.
17477   if (IndexC && VT.isFixedLengthVector() &&
17478       IndexC->getZExtValue() >= VT.getVectorNumElements())
17479     return DAG.getUNDEF(VT);
17480 
17481   // Remove redundant insertions:
17482   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
17483   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17484       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
17485     return InVec;
17486 
17487   if (!IndexC) {
17488     // If this is variable insert to undef vector, it might be better to splat:
17489     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
17490     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
17491       if (VT.isScalableVector())
17492         return DAG.getSplatVector(VT, DL, InVal);
17493       else {
17494         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
17495         return DAG.getBuildVector(VT, DL, Ops);
17496       }
17497     }
17498     return SDValue();
17499   }
17500 
17501   if (VT.isScalableVector())
17502     return SDValue();
17503 
17504   unsigned NumElts = VT.getVectorNumElements();
17505 
17506   // We must know which element is being inserted for folds below here.
17507   unsigned Elt = IndexC->getZExtValue();
17508   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
17509     return Shuf;
17510 
17511   // Canonicalize insert_vector_elt dag nodes.
17512   // Example:
17513   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
17514   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
17515   //
17516   // Do this only if the child insert_vector node has one use; also
17517   // do this only if indices are both constants and Idx1 < Idx0.
17518   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
17519       && isa<ConstantSDNode>(InVec.getOperand(2))) {
17520     unsigned OtherElt = InVec.getConstantOperandVal(2);
17521     if (Elt < OtherElt) {
17522       // Swap nodes.
17523       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
17524                                   InVec.getOperand(0), InVal, EltNo);
17525       AddToWorklist(NewOp.getNode());
17526       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
17527                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
17528     }
17529   }
17530 
17531   // If we can't generate a legal BUILD_VECTOR, exit
17532   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
17533     return SDValue();
17534 
17535   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
17536   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
17537   // vector elements.
17538   SmallVector<SDValue, 8> Ops;
17539   // Do not combine these two vectors if the output vector will not replace
17540   // the input vector.
17541   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
17542     Ops.append(InVec.getNode()->op_begin(),
17543                InVec.getNode()->op_end());
17544   } else if (InVec.isUndef()) {
17545     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
17546   } else {
17547     return SDValue();
17548   }
17549   assert(Ops.size() == NumElts && "Unexpected vector size");
17550 
17551   // Insert the element
17552   if (Elt < Ops.size()) {
17553     // All the operands of BUILD_VECTOR must have the same type;
17554     // we enforce that here.
17555     EVT OpVT = Ops[0].getValueType();
17556     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
17557   }
17558 
17559   // Return the new vector
17560   return DAG.getBuildVector(VT, DL, Ops);
17561 }
17562 
17563 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
17564                                                   SDValue EltNo,
17565                                                   LoadSDNode *OriginalLoad) {
17566   assert(OriginalLoad->isSimple());
17567 
17568   EVT ResultVT = EVE->getValueType(0);
17569   EVT VecEltVT = InVecVT.getVectorElementType();
17570   Align Alignment = OriginalLoad->getAlign();
17571   Align NewAlign = DAG.getDataLayout().getABITypeAlign(
17572       VecEltVT.getTypeForEVT(*DAG.getContext()));
17573 
17574   if (NewAlign > Alignment ||
17575       !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
17576     return SDValue();
17577 
17578   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
17579     ISD::NON_EXTLOAD : ISD::EXTLOAD;
17580   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
17581     return SDValue();
17582 
17583   Alignment = NewAlign;
17584 
17585   SDValue NewPtr = OriginalLoad->getBasePtr();
17586   SDValue Offset;
17587   EVT PtrType = NewPtr.getValueType();
17588   MachinePointerInfo MPI;
17589   SDLoc DL(EVE);
17590   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
17591     int Elt = ConstEltNo->getZExtValue();
17592     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
17593     Offset = DAG.getConstant(PtrOff, DL, PtrType);
17594     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
17595   } else {
17596     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
17597     Offset = DAG.getNode(
17598         ISD::MUL, DL, PtrType, Offset,
17599         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
17600     // Discard the pointer info except the address space because the memory
17601     // operand can't represent this new access since the offset is variable.
17602     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
17603   }
17604   NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
17605 
17606   // The replacement we need to do here is a little tricky: we need to
17607   // replace an extractelement of a load with a load.
17608   // Use ReplaceAllUsesOfValuesWith to do the replacement.
17609   // Note that this replacement assumes that the extractvalue is the only
17610   // use of the load; that's okay because we don't want to perform this
17611   // transformation in other cases anyway.
17612   SDValue Load;
17613   SDValue Chain;
17614   if (ResultVT.bitsGT(VecEltVT)) {
17615     // If the result type of vextract is wider than the load, then issue an
17616     // extending load instead.
17617     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
17618                                                   VecEltVT)
17619                                    ? ISD::ZEXTLOAD
17620                                    : ISD::EXTLOAD;
17621     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
17622                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
17623                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
17624                           OriginalLoad->getAAInfo());
17625     Chain = Load.getValue(1);
17626   } else {
17627     Load = DAG.getLoad(
17628         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
17629         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
17630     Chain = Load.getValue(1);
17631     if (ResultVT.bitsLT(VecEltVT))
17632       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
17633     else
17634       Load = DAG.getBitcast(ResultVT, Load);
17635   }
17636   WorklistRemover DeadNodes(*this);
17637   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
17638   SDValue To[] = { Load, Chain };
17639   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
17640   // Make sure to revisit this node to clean it up; it will usually be dead.
17641   AddToWorklist(EVE);
17642   // Since we're explicitly calling ReplaceAllUses, add the new node to the
17643   // worklist explicitly as well.
17644   AddToWorklistWithUsers(Load.getNode());
17645   ++OpsNarrowed;
17646   return SDValue(EVE, 0);
17647 }
17648 
17649 /// Transform a vector binary operation into a scalar binary operation by moving
17650 /// the math/logic after an extract element of a vector.
17651 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
17652                                        bool LegalOperations) {
17653   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17654   SDValue Vec = ExtElt->getOperand(0);
17655   SDValue Index = ExtElt->getOperand(1);
17656   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17657   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
17658       Vec.getNode()->getNumValues() != 1)
17659     return SDValue();
17660 
17661   // Targets may want to avoid this to prevent an expensive register transfer.
17662   if (!TLI.shouldScalarizeBinop(Vec))
17663     return SDValue();
17664 
17665   // Extracting an element of a vector constant is constant-folded, so this
17666   // transform is just replacing a vector op with a scalar op while moving the
17667   // extract.
17668   SDValue Op0 = Vec.getOperand(0);
17669   SDValue Op1 = Vec.getOperand(1);
17670   if (isAnyConstantBuildVector(Op0, true) ||
17671       isAnyConstantBuildVector(Op1, true)) {
17672     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
17673     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
17674     SDLoc DL(ExtElt);
17675     EVT VT = ExtElt->getValueType(0);
17676     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
17677     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
17678     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
17679   }
17680 
17681   return SDValue();
17682 }
17683 
17684 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
17685   SDValue VecOp = N->getOperand(0);
17686   SDValue Index = N->getOperand(1);
17687   EVT ScalarVT = N->getValueType(0);
17688   EVT VecVT = VecOp.getValueType();
17689   if (VecOp.isUndef())
17690     return DAG.getUNDEF(ScalarVT);
17691 
17692   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
17693   //
17694   // This only really matters if the index is non-constant since other combines
17695   // on the constant elements already work.
17696   SDLoc DL(N);
17697   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
17698       Index == VecOp.getOperand(2)) {
17699     SDValue Elt = VecOp.getOperand(1);
17700     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
17701   }
17702 
17703   // (vextract (scalar_to_vector val, 0) -> val
17704   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17705     // Only 0'th element of SCALAR_TO_VECTOR is defined.
17706     if (DAG.isKnownNeverZero(Index))
17707       return DAG.getUNDEF(ScalarVT);
17708 
17709     // Check if the result type doesn't match the inserted element type. A
17710     // SCALAR_TO_VECTOR may truncate the inserted element and the
17711     // EXTRACT_VECTOR_ELT may widen the extracted vector.
17712     SDValue InOp = VecOp.getOperand(0);
17713     if (InOp.getValueType() != ScalarVT) {
17714       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17715       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17716     }
17717     return InOp;
17718   }
17719 
17720   // extract_vector_elt of out-of-bounds element -> UNDEF
17721   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17722   if (IndexC && VecVT.isFixedLengthVector() &&
17723       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
17724     return DAG.getUNDEF(ScalarVT);
17725 
17726   // extract_vector_elt (build_vector x, y), 1 -> y
17727   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
17728        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
17729       TLI.isTypeLegal(VecVT) &&
17730       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
17731     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
17732             VecVT.isFixedLengthVector()) &&
17733            "BUILD_VECTOR used for scalable vectors");
17734     unsigned IndexVal =
17735         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
17736     SDValue Elt = VecOp.getOperand(IndexVal);
17737     EVT InEltVT = Elt.getValueType();
17738 
17739     // Sometimes build_vector's scalar input types do not match result type.
17740     if (ScalarVT == InEltVT)
17741       return Elt;
17742 
17743     // TODO: It may be useful to truncate if free if the build_vector implicitly
17744     // converts.
17745   }
17746 
17747   if (VecVT.isScalableVector())
17748     return SDValue();
17749 
17750   // All the code from this point onwards assumes fixed width vectors, but it's
17751   // possible that some of the combinations could be made to work for scalable
17752   // vectors too.
17753   unsigned NumElts = VecVT.getVectorNumElements();
17754   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
17755 
17756   // TODO: These transforms should not require the 'hasOneUse' restriction, but
17757   // there are regressions on multiple targets without it. We can end up with a
17758   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
17759   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
17760       VecOp.hasOneUse()) {
17761     // The vector index of the LSBs of the source depend on the endian-ness.
17762     bool IsLE = DAG.getDataLayout().isLittleEndian();
17763     unsigned ExtractIndex = IndexC->getZExtValue();
17764     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
17765     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
17766     SDValue BCSrc = VecOp.getOperand(0);
17767     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
17768       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
17769 
17770     if (LegalTypes && BCSrc.getValueType().isInteger() &&
17771         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17772       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
17773       // trunc i64 X to i32
17774       SDValue X = BCSrc.getOperand(0);
17775       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
17776              "Extract element and scalar to vector can't change element type "
17777              "from FP to integer.");
17778       unsigned XBitWidth = X.getValueSizeInBits();
17779       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
17780 
17781       // An extract element return value type can be wider than its vector
17782       // operand element type. In that case, the high bits are undefined, so
17783       // it's possible that we may need to extend rather than truncate.
17784       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
17785         assert(XBitWidth % VecEltBitWidth == 0 &&
17786                "Scalar bitwidth must be a multiple of vector element bitwidth");
17787         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
17788       }
17789     }
17790   }
17791 
17792   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
17793     return BO;
17794 
17795   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
17796   // We only perform this optimization before the op legalization phase because
17797   // we may introduce new vector instructions which are not backed by TD
17798   // patterns. For example on AVX, extracting elements from a wide vector
17799   // without using extract_subvector. However, if we can find an underlying
17800   // scalar value, then we can always use that.
17801   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
17802     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
17803     // Find the new index to extract from.
17804     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
17805 
17806     // Extracting an undef index is undef.
17807     if (OrigElt == -1)
17808       return DAG.getUNDEF(ScalarVT);
17809 
17810     // Select the right vector half to extract from.
17811     SDValue SVInVec;
17812     if (OrigElt < (int)NumElts) {
17813       SVInVec = VecOp.getOperand(0);
17814     } else {
17815       SVInVec = VecOp.getOperand(1);
17816       OrigElt -= NumElts;
17817     }
17818 
17819     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
17820       SDValue InOp = SVInVec.getOperand(OrigElt);
17821       if (InOp.getValueType() != ScalarVT) {
17822         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17823         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17824       }
17825 
17826       return InOp;
17827     }
17828 
17829     // FIXME: We should handle recursing on other vector shuffles and
17830     // scalar_to_vector here as well.
17831 
17832     if (!LegalOperations ||
17833         // FIXME: Should really be just isOperationLegalOrCustom.
17834         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
17835         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
17836       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
17837                          DAG.getVectorIdxConstant(OrigElt, DL));
17838     }
17839   }
17840 
17841   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
17842   // simplify it based on the (valid) extraction indices.
17843   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
17844         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17845                Use->getOperand(0) == VecOp &&
17846                isa<ConstantSDNode>(Use->getOperand(1));
17847       })) {
17848     APInt DemandedElts = APInt::getNullValue(NumElts);
17849     for (SDNode *Use : VecOp->uses()) {
17850       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
17851       if (CstElt->getAPIntValue().ult(NumElts))
17852         DemandedElts.setBit(CstElt->getZExtValue());
17853     }
17854     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
17855       // We simplified the vector operand of this extract element. If this
17856       // extract is not dead, visit it again so it is folded properly.
17857       if (N->getOpcode() != ISD::DELETED_NODE)
17858         AddToWorklist(N);
17859       return SDValue(N, 0);
17860     }
17861     APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
17862     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
17863       // We simplified the vector operand of this extract element. If this
17864       // extract is not dead, visit it again so it is folded properly.
17865       if (N->getOpcode() != ISD::DELETED_NODE)
17866         AddToWorklist(N);
17867       return SDValue(N, 0);
17868     }
17869   }
17870 
17871   // Everything under here is trying to match an extract of a loaded value.
17872   // If the result of load has to be truncated, then it's not necessarily
17873   // profitable.
17874   bool BCNumEltsChanged = false;
17875   EVT ExtVT = VecVT.getVectorElementType();
17876   EVT LVT = ExtVT;
17877   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
17878     return SDValue();
17879 
17880   if (VecOp.getOpcode() == ISD::BITCAST) {
17881     // Don't duplicate a load with other uses.
17882     if (!VecOp.hasOneUse())
17883       return SDValue();
17884 
17885     EVT BCVT = VecOp.getOperand(0).getValueType();
17886     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
17887       return SDValue();
17888     if (NumElts != BCVT.getVectorNumElements())
17889       BCNumEltsChanged = true;
17890     VecOp = VecOp.getOperand(0);
17891     ExtVT = BCVT.getVectorElementType();
17892   }
17893 
17894   // extract (vector load $addr), i --> load $addr + i * size
17895   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
17896       ISD::isNormalLoad(VecOp.getNode()) &&
17897       !Index->hasPredecessor(VecOp.getNode())) {
17898     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
17899     if (VecLoad && VecLoad->isSimple())
17900       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
17901   }
17902 
17903   // Perform only after legalization to ensure build_vector / vector_shuffle
17904   // optimizations have already been done.
17905   if (!LegalOperations || !IndexC)
17906     return SDValue();
17907 
17908   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
17909   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
17910   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
17911   int Elt = IndexC->getZExtValue();
17912   LoadSDNode *LN0 = nullptr;
17913   if (ISD::isNormalLoad(VecOp.getNode())) {
17914     LN0 = cast<LoadSDNode>(VecOp);
17915   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17916              VecOp.getOperand(0).getValueType() == ExtVT &&
17917              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
17918     // Don't duplicate a load with other uses.
17919     if (!VecOp.hasOneUse())
17920       return SDValue();
17921 
17922     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
17923   }
17924   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
17925     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
17926     // =>
17927     // (load $addr+1*size)
17928 
17929     // Don't duplicate a load with other uses.
17930     if (!VecOp.hasOneUse())
17931       return SDValue();
17932 
17933     // If the bit convert changed the number of elements, it is unsafe
17934     // to examine the mask.
17935     if (BCNumEltsChanged)
17936       return SDValue();
17937 
17938     // Select the input vector, guarding against out of range extract vector.
17939     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
17940     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
17941 
17942     if (VecOp.getOpcode() == ISD::BITCAST) {
17943       // Don't duplicate a load with other uses.
17944       if (!VecOp.hasOneUse())
17945         return SDValue();
17946 
17947       VecOp = VecOp.getOperand(0);
17948     }
17949     if (ISD::isNormalLoad(VecOp.getNode())) {
17950       LN0 = cast<LoadSDNode>(VecOp);
17951       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
17952       Index = DAG.getConstant(Elt, DL, Index.getValueType());
17953     }
17954   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
17955              VecVT.getVectorElementType() == ScalarVT &&
17956              (!LegalTypes ||
17957               TLI.isTypeLegal(
17958                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
17959     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
17960     //      -> extract_vector_elt a, 0
17961     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
17962     //      -> extract_vector_elt a, 1
17963     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
17964     //      -> extract_vector_elt b, 0
17965     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
17966     //      -> extract_vector_elt b, 1
17967     SDLoc SL(N);
17968     EVT ConcatVT = VecOp.getOperand(0).getValueType();
17969     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17970     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
17971                                      Index.getValueType());
17972 
17973     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
17974     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
17975                               ConcatVT.getVectorElementType(),
17976                               ConcatOp, NewIdx);
17977     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
17978   }
17979 
17980   // Make sure we found a non-volatile load and the extractelement is
17981   // the only use.
17982   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
17983     return SDValue();
17984 
17985   // If Idx was -1 above, Elt is going to be -1, so just return undef.
17986   if (Elt == -1)
17987     return DAG.getUNDEF(LVT);
17988 
17989   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17990 }
17991 
17992 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
17993 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17994   // We perform this optimization post type-legalization because
17995   // the type-legalizer often scalarizes integer-promoted vectors.
17996   // Performing this optimization before may create bit-casts which
17997   // will be type-legalized to complex code sequences.
17998   // We perform this optimization only before the operation legalizer because we
17999   // may introduce illegal operations.
18000   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
18001     return SDValue();
18002 
18003   unsigned NumInScalars = N->getNumOperands();
18004   SDLoc DL(N);
18005   EVT VT = N->getValueType(0);
18006 
18007   // Check to see if this is a BUILD_VECTOR of a bunch of values
18008   // which come from any_extend or zero_extend nodes. If so, we can create
18009   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
18010   // optimizations. We do not handle sign-extend because we can't fill the sign
18011   // using shuffles.
18012   EVT SourceType = MVT::Other;
18013   bool AllAnyExt = true;
18014 
18015   for (unsigned i = 0; i != NumInScalars; ++i) {
18016     SDValue In = N->getOperand(i);
18017     // Ignore undef inputs.
18018     if (In.isUndef()) continue;
18019 
18020     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
18021     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
18022 
18023     // Abort if the element is not an extension.
18024     if (!ZeroExt && !AnyExt) {
18025       SourceType = MVT::Other;
18026       break;
18027     }
18028 
18029     // The input is a ZeroExt or AnyExt. Check the original type.
18030     EVT InTy = In.getOperand(0).getValueType();
18031 
18032     // Check that all of the widened source types are the same.
18033     if (SourceType == MVT::Other)
18034       // First time.
18035       SourceType = InTy;
18036     else if (InTy != SourceType) {
18037       // Multiple income types. Abort.
18038       SourceType = MVT::Other;
18039       break;
18040     }
18041 
18042     // Check if all of the extends are ANY_EXTENDs.
18043     AllAnyExt &= AnyExt;
18044   }
18045 
18046   // In order to have valid types, all of the inputs must be extended from the
18047   // same source type and all of the inputs must be any or zero extend.
18048   // Scalar sizes must be a power of two.
18049   EVT OutScalarTy = VT.getScalarType();
18050   bool ValidTypes = SourceType != MVT::Other &&
18051                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
18052                  isPowerOf2_32(SourceType.getSizeInBits());
18053 
18054   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
18055   // turn into a single shuffle instruction.
18056   if (!ValidTypes)
18057     return SDValue();
18058 
18059   // If we already have a splat buildvector, then don't fold it if it means
18060   // introducing zeros.
18061   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
18062     return SDValue();
18063 
18064   bool isLE = DAG.getDataLayout().isLittleEndian();
18065   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
18066   assert(ElemRatio > 1 && "Invalid element size ratio");
18067   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
18068                                DAG.getConstant(0, DL, SourceType);
18069 
18070   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
18071   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
18072 
18073   // Populate the new build_vector
18074   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18075     SDValue Cast = N->getOperand(i);
18076     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
18077             Cast.getOpcode() == ISD::ZERO_EXTEND ||
18078             Cast.isUndef()) && "Invalid cast opcode");
18079     SDValue In;
18080     if (Cast.isUndef())
18081       In = DAG.getUNDEF(SourceType);
18082     else
18083       In = Cast->getOperand(0);
18084     unsigned Index = isLE ? (i * ElemRatio) :
18085                             (i * ElemRatio + (ElemRatio - 1));
18086 
18087     assert(Index < Ops.size() && "Invalid index");
18088     Ops[Index] = In;
18089   }
18090 
18091   // The type of the new BUILD_VECTOR node.
18092   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
18093   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
18094          "Invalid vector size");
18095   // Check if the new vector type is legal.
18096   if (!isTypeLegal(VecVT) ||
18097       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
18098        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
18099     return SDValue();
18100 
18101   // Make the new BUILD_VECTOR.
18102   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
18103 
18104   // The new BUILD_VECTOR node has the potential to be further optimized.
18105   AddToWorklist(BV.getNode());
18106   // Bitcast to the desired type.
18107   return DAG.getBitcast(VT, BV);
18108 }
18109 
18110 // Simplify (build_vec (trunc $1)
18111 //                     (trunc (srl $1 half-width))
18112 //                     (trunc (srl $1 (2 * half-width))) …)
18113 // to (bitcast $1)
18114 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
18115   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18116 
18117   // Only for little endian
18118   if (!DAG.getDataLayout().isLittleEndian())
18119     return SDValue();
18120 
18121   SDLoc DL(N);
18122   EVT VT = N->getValueType(0);
18123   EVT OutScalarTy = VT.getScalarType();
18124   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
18125 
18126   // Only for power of two types to be sure that bitcast works well
18127   if (!isPowerOf2_64(ScalarTypeBitsize))
18128     return SDValue();
18129 
18130   unsigned NumInScalars = N->getNumOperands();
18131 
18132   // Look through bitcasts
18133   auto PeekThroughBitcast = [](SDValue Op) {
18134     if (Op.getOpcode() == ISD::BITCAST)
18135       return Op.getOperand(0);
18136     return Op;
18137   };
18138 
18139   // The source value where all the parts are extracted.
18140   SDValue Src;
18141   for (unsigned i = 0; i != NumInScalars; ++i) {
18142     SDValue In = PeekThroughBitcast(N->getOperand(i));
18143     // Ignore undef inputs.
18144     if (In.isUndef()) continue;
18145 
18146     if (In.getOpcode() != ISD::TRUNCATE)
18147       return SDValue();
18148 
18149     In = PeekThroughBitcast(In.getOperand(0));
18150 
18151     if (In.getOpcode() != ISD::SRL) {
18152       // For now only build_vec without shuffling, handle shifts here in the
18153       // future.
18154       if (i != 0)
18155         return SDValue();
18156 
18157       Src = In;
18158     } else {
18159       // In is SRL
18160       SDValue part = PeekThroughBitcast(In.getOperand(0));
18161 
18162       if (!Src) {
18163         Src = part;
18164       } else if (Src != part) {
18165         // Vector parts do not stem from the same variable
18166         return SDValue();
18167       }
18168 
18169       SDValue ShiftAmtVal = In.getOperand(1);
18170       if (!isa<ConstantSDNode>(ShiftAmtVal))
18171         return SDValue();
18172 
18173       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
18174 
18175       // The extracted value is not extracted at the right position
18176       if (ShiftAmt != i * ScalarTypeBitsize)
18177         return SDValue();
18178     }
18179   }
18180 
18181   // Only cast if the size is the same
18182   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
18183     return SDValue();
18184 
18185   return DAG.getBitcast(VT, Src);
18186 }
18187 
18188 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
18189                                            ArrayRef<int> VectorMask,
18190                                            SDValue VecIn1, SDValue VecIn2,
18191                                            unsigned LeftIdx, bool DidSplitVec) {
18192   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18193 
18194   EVT VT = N->getValueType(0);
18195   EVT InVT1 = VecIn1.getValueType();
18196   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
18197 
18198   unsigned NumElems = VT.getVectorNumElements();
18199   unsigned ShuffleNumElems = NumElems;
18200 
18201   // If we artificially split a vector in two already, then the offsets in the
18202   // operands will all be based off of VecIn1, even those in VecIn2.
18203   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
18204 
18205   // We can't generate a shuffle node with mismatched input and output types.
18206   // Try to make the types match the type of the output.
18207   if (InVT1 != VT || InVT2 != VT) {
18208     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
18209       // If the output vector length is a multiple of both input lengths,
18210       // we can concatenate them and pad the rest with undefs.
18211       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
18212       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
18213       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
18214       ConcatOps[0] = VecIn1;
18215       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
18216       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18217       VecIn2 = SDValue();
18218     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
18219       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
18220         return SDValue();
18221 
18222       if (!VecIn2.getNode()) {
18223         // If we only have one input vector, and it's twice the size of the
18224         // output, split it in two.
18225         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
18226                              DAG.getVectorIdxConstant(NumElems, DL));
18227         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
18228         // Since we now have shorter input vectors, adjust the offset of the
18229         // second vector's start.
18230         Vec2Offset = NumElems;
18231       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
18232         // VecIn1 is wider than the output, and we have another, possibly
18233         // smaller input. Pad the smaller input with undefs, shuffle at the
18234         // input vector width, and extract the output.
18235         // The shuffle type is different than VT, so check legality again.
18236         if (LegalOperations &&
18237             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
18238           return SDValue();
18239 
18240         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
18241         // lower it back into a BUILD_VECTOR. So if the inserted type is
18242         // illegal, don't even try.
18243         if (InVT1 != InVT2) {
18244           if (!TLI.isTypeLegal(InVT2))
18245             return SDValue();
18246           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
18247                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
18248         }
18249         ShuffleNumElems = NumElems * 2;
18250       } else {
18251         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
18252         // than VecIn1. We can't handle this for now - this case will disappear
18253         // when we start sorting the vectors by type.
18254         return SDValue();
18255       }
18256     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
18257                InVT1.getSizeInBits() == VT.getSizeInBits()) {
18258       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
18259       ConcatOps[0] = VecIn2;
18260       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18261     } else {
18262       // TODO: Support cases where the length mismatch isn't exactly by a
18263       // factor of 2.
18264       // TODO: Move this check upwards, so that if we have bad type
18265       // mismatches, we don't create any DAG nodes.
18266       return SDValue();
18267     }
18268   }
18269 
18270   // Initialize mask to undef.
18271   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
18272 
18273   // Only need to run up to the number of elements actually used, not the
18274   // total number of elements in the shuffle - if we are shuffling a wider
18275   // vector, the high lanes should be set to undef.
18276   for (unsigned i = 0; i != NumElems; ++i) {
18277     if (VectorMask[i] <= 0)
18278       continue;
18279 
18280     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
18281     if (VectorMask[i] == (int)LeftIdx) {
18282       Mask[i] = ExtIndex;
18283     } else if (VectorMask[i] == (int)LeftIdx + 1) {
18284       Mask[i] = Vec2Offset + ExtIndex;
18285     }
18286   }
18287 
18288   // The type the input vectors may have changed above.
18289   InVT1 = VecIn1.getValueType();
18290 
18291   // If we already have a VecIn2, it should have the same type as VecIn1.
18292   // If we don't, get an undef/zero vector of the appropriate type.
18293   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
18294   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
18295 
18296   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
18297   if (ShuffleNumElems > NumElems)
18298     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
18299 
18300   return Shuffle;
18301 }
18302 
18303 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
18304   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18305 
18306   // First, determine where the build vector is not undef.
18307   // TODO: We could extend this to handle zero elements as well as undefs.
18308   int NumBVOps = BV->getNumOperands();
18309   int ZextElt = -1;
18310   for (int i = 0; i != NumBVOps; ++i) {
18311     SDValue Op = BV->getOperand(i);
18312     if (Op.isUndef())
18313       continue;
18314     if (ZextElt == -1)
18315       ZextElt = i;
18316     else
18317       return SDValue();
18318   }
18319   // Bail out if there's no non-undef element.
18320   if (ZextElt == -1)
18321     return SDValue();
18322 
18323   // The build vector contains some number of undef elements and exactly
18324   // one other element. That other element must be a zero-extended scalar
18325   // extracted from a vector at a constant index to turn this into a shuffle.
18326   // Also, require that the build vector does not implicitly truncate/extend
18327   // its elements.
18328   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
18329   EVT VT = BV->getValueType(0);
18330   SDValue Zext = BV->getOperand(ZextElt);
18331   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
18332       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18333       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
18334       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
18335     return SDValue();
18336 
18337   // The zero-extend must be a multiple of the source size, and we must be
18338   // building a vector of the same size as the source of the extract element.
18339   SDValue Extract = Zext.getOperand(0);
18340   unsigned DestSize = Zext.getValueSizeInBits();
18341   unsigned SrcSize = Extract.getValueSizeInBits();
18342   if (DestSize % SrcSize != 0 ||
18343       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
18344     return SDValue();
18345 
18346   // Create a shuffle mask that will combine the extracted element with zeros
18347   // and undefs.
18348   int ZextRatio = DestSize / SrcSize;
18349   int NumMaskElts = NumBVOps * ZextRatio;
18350   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
18351   for (int i = 0; i != NumMaskElts; ++i) {
18352     if (i / ZextRatio == ZextElt) {
18353       // The low bits of the (potentially translated) extracted element map to
18354       // the source vector. The high bits map to zero. We will use a zero vector
18355       // as the 2nd source operand of the shuffle, so use the 1st element of
18356       // that vector (mask value is number-of-elements) for the high bits.
18357       if (i % ZextRatio == 0)
18358         ShufMask[i] = Extract.getConstantOperandVal(1);
18359       else
18360         ShufMask[i] = NumMaskElts;
18361     }
18362 
18363     // Undef elements of the build vector remain undef because we initialize
18364     // the shuffle mask with -1.
18365   }
18366 
18367   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
18368   // bitcast (shuffle V, ZeroVec, VectorMask)
18369   SDLoc DL(BV);
18370   EVT VecVT = Extract.getOperand(0).getValueType();
18371   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
18372   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18373   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
18374                                              ZeroVec, ShufMask, DAG);
18375   if (!Shuf)
18376     return SDValue();
18377   return DAG.getBitcast(VT, Shuf);
18378 }
18379 
18380 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
18381 // operations. If the types of the vectors we're extracting from allow it,
18382 // turn this into a vector_shuffle node.
18383 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
18384   SDLoc DL(N);
18385   EVT VT = N->getValueType(0);
18386 
18387   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
18388   if (!isTypeLegal(VT))
18389     return SDValue();
18390 
18391   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
18392     return V;
18393 
18394   // May only combine to shuffle after legalize if shuffle is legal.
18395   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
18396     return SDValue();
18397 
18398   bool UsesZeroVector = false;
18399   unsigned NumElems = N->getNumOperands();
18400 
18401   // Record, for each element of the newly built vector, which input vector
18402   // that element comes from. -1 stands for undef, 0 for the zero vector,
18403   // and positive values for the input vectors.
18404   // VectorMask maps each element to its vector number, and VecIn maps vector
18405   // numbers to their initial SDValues.
18406 
18407   SmallVector<int, 8> VectorMask(NumElems, -1);
18408   SmallVector<SDValue, 8> VecIn;
18409   VecIn.push_back(SDValue());
18410 
18411   for (unsigned i = 0; i != NumElems; ++i) {
18412     SDValue Op = N->getOperand(i);
18413 
18414     if (Op.isUndef())
18415       continue;
18416 
18417     // See if we can use a blend with a zero vector.
18418     // TODO: Should we generalize this to a blend with an arbitrary constant
18419     // vector?
18420     if (isNullConstant(Op) || isNullFPConstant(Op)) {
18421       UsesZeroVector = true;
18422       VectorMask[i] = 0;
18423       continue;
18424     }
18425 
18426     // Not an undef or zero. If the input is something other than an
18427     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
18428     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18429         !isa<ConstantSDNode>(Op.getOperand(1)))
18430       return SDValue();
18431     SDValue ExtractedFromVec = Op.getOperand(0);
18432 
18433     if (ExtractedFromVec.getValueType().isScalableVector())
18434       return SDValue();
18435 
18436     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
18437     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
18438       return SDValue();
18439 
18440     // All inputs must have the same element type as the output.
18441     if (VT.getVectorElementType() !=
18442         ExtractedFromVec.getValueType().getVectorElementType())
18443       return SDValue();
18444 
18445     // Have we seen this input vector before?
18446     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
18447     // a map back from SDValues to numbers isn't worth it.
18448     unsigned Idx = std::distance(
18449         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
18450     if (Idx == VecIn.size())
18451       VecIn.push_back(ExtractedFromVec);
18452 
18453     VectorMask[i] = Idx;
18454   }
18455 
18456   // If we didn't find at least one input vector, bail out.
18457   if (VecIn.size() < 2)
18458     return SDValue();
18459 
18460   // If all the Operands of BUILD_VECTOR extract from same
18461   // vector, then split the vector efficiently based on the maximum
18462   // vector access index and adjust the VectorMask and
18463   // VecIn accordingly.
18464   bool DidSplitVec = false;
18465   if (VecIn.size() == 2) {
18466     unsigned MaxIndex = 0;
18467     unsigned NearestPow2 = 0;
18468     SDValue Vec = VecIn.back();
18469     EVT InVT = Vec.getValueType();
18470     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
18471 
18472     for (unsigned i = 0; i < NumElems; i++) {
18473       if (VectorMask[i] <= 0)
18474         continue;
18475       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
18476       IndexVec[i] = Index;
18477       MaxIndex = std::max(MaxIndex, Index);
18478     }
18479 
18480     NearestPow2 = PowerOf2Ceil(MaxIndex);
18481     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
18482         NumElems * 2 < NearestPow2) {
18483       unsigned SplitSize = NearestPow2 / 2;
18484       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
18485                                      InVT.getVectorElementType(), SplitSize);
18486       if (TLI.isTypeLegal(SplitVT)) {
18487         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18488                                      DAG.getVectorIdxConstant(SplitSize, DL));
18489         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18490                                      DAG.getVectorIdxConstant(0, DL));
18491         VecIn.pop_back();
18492         VecIn.push_back(VecIn1);
18493         VecIn.push_back(VecIn2);
18494         DidSplitVec = true;
18495 
18496         for (unsigned i = 0; i < NumElems; i++) {
18497           if (VectorMask[i] <= 0)
18498             continue;
18499           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
18500         }
18501       }
18502     }
18503   }
18504 
18505   // TODO: We want to sort the vectors by descending length, so that adjacent
18506   // pairs have similar length, and the longer vector is always first in the
18507   // pair.
18508 
18509   // TODO: Should this fire if some of the input vectors has illegal type (like
18510   // it does now), or should we let legalization run its course first?
18511 
18512   // Shuffle phase:
18513   // Take pairs of vectors, and shuffle them so that the result has elements
18514   // from these vectors in the correct places.
18515   // For example, given:
18516   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
18517   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
18518   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
18519   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
18520   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
18521   // We will generate:
18522   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
18523   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
18524   SmallVector<SDValue, 4> Shuffles;
18525   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
18526     unsigned LeftIdx = 2 * In + 1;
18527     SDValue VecLeft = VecIn[LeftIdx];
18528     SDValue VecRight =
18529         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
18530 
18531     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
18532                                                 VecRight, LeftIdx, DidSplitVec))
18533       Shuffles.push_back(Shuffle);
18534     else
18535       return SDValue();
18536   }
18537 
18538   // If we need the zero vector as an "ingredient" in the blend tree, add it
18539   // to the list of shuffles.
18540   if (UsesZeroVector)
18541     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
18542                                       : DAG.getConstantFP(0.0, DL, VT));
18543 
18544   // If we only have one shuffle, we're done.
18545   if (Shuffles.size() == 1)
18546     return Shuffles[0];
18547 
18548   // Update the vector mask to point to the post-shuffle vectors.
18549   for (int &Vec : VectorMask)
18550     if (Vec == 0)
18551       Vec = Shuffles.size() - 1;
18552     else
18553       Vec = (Vec - 1) / 2;
18554 
18555   // More than one shuffle. Generate a binary tree of blends, e.g. if from
18556   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
18557   // generate:
18558   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
18559   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
18560   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
18561   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
18562   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
18563   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
18564   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
18565 
18566   // Make sure the initial size of the shuffle list is even.
18567   if (Shuffles.size() % 2)
18568     Shuffles.push_back(DAG.getUNDEF(VT));
18569 
18570   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
18571     if (CurSize % 2) {
18572       Shuffles[CurSize] = DAG.getUNDEF(VT);
18573       CurSize++;
18574     }
18575     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
18576       int Left = 2 * In;
18577       int Right = 2 * In + 1;
18578       SmallVector<int, 8> Mask(NumElems, -1);
18579       for (unsigned i = 0; i != NumElems; ++i) {
18580         if (VectorMask[i] == Left) {
18581           Mask[i] = i;
18582           VectorMask[i] = In;
18583         } else if (VectorMask[i] == Right) {
18584           Mask[i] = i + NumElems;
18585           VectorMask[i] = In;
18586         }
18587       }
18588 
18589       Shuffles[In] =
18590           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
18591     }
18592   }
18593   return Shuffles[0];
18594 }
18595 
18596 // Try to turn a build vector of zero extends of extract vector elts into a
18597 // a vector zero extend and possibly an extract subvector.
18598 // TODO: Support sign extend?
18599 // TODO: Allow undef elements?
18600 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
18601   if (LegalOperations)
18602     return SDValue();
18603 
18604   EVT VT = N->getValueType(0);
18605 
18606   bool FoundZeroExtend = false;
18607   SDValue Op0 = N->getOperand(0);
18608   auto checkElem = [&](SDValue Op) -> int64_t {
18609     unsigned Opc = Op.getOpcode();
18610     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
18611     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
18612         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18613         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
18614       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
18615         return C->getZExtValue();
18616     return -1;
18617   };
18618 
18619   // Make sure the first element matches
18620   // (zext (extract_vector_elt X, C))
18621   int64_t Offset = checkElem(Op0);
18622   if (Offset < 0)
18623     return SDValue();
18624 
18625   unsigned NumElems = N->getNumOperands();
18626   SDValue In = Op0.getOperand(0).getOperand(0);
18627   EVT InSVT = In.getValueType().getScalarType();
18628   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
18629 
18630   // Don't create an illegal input type after type legalization.
18631   if (LegalTypes && !TLI.isTypeLegal(InVT))
18632     return SDValue();
18633 
18634   // Ensure all the elements come from the same vector and are adjacent.
18635   for (unsigned i = 1; i != NumElems; ++i) {
18636     if ((Offset + i) != checkElem(N->getOperand(i)))
18637       return SDValue();
18638   }
18639 
18640   SDLoc DL(N);
18641   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
18642                    Op0.getOperand(0).getOperand(1));
18643   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
18644                      VT, In);
18645 }
18646 
18647 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
18648   EVT VT = N->getValueType(0);
18649 
18650   // A vector built entirely of undefs is undef.
18651   if (ISD::allOperandsUndef(N))
18652     return DAG.getUNDEF(VT);
18653 
18654   // If this is a splat of a bitcast from another vector, change to a
18655   // concat_vector.
18656   // For example:
18657   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
18658   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
18659   //
18660   // If X is a build_vector itself, the concat can become a larger build_vector.
18661   // TODO: Maybe this is useful for non-splat too?
18662   if (!LegalOperations) {
18663     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18664       Splat = peekThroughBitcasts(Splat);
18665       EVT SrcVT = Splat.getValueType();
18666       if (SrcVT.isVector()) {
18667         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
18668         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
18669                                      SrcVT.getVectorElementType(), NumElts);
18670         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
18671           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
18672           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
18673                                        NewVT, Ops);
18674           return DAG.getBitcast(VT, Concat);
18675         }
18676       }
18677     }
18678   }
18679 
18680   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
18681   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
18682     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18683       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
18684       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
18685     }
18686 
18687   // Check if we can express BUILD VECTOR via subvector extract.
18688   if (!LegalTypes && (N->getNumOperands() > 1)) {
18689     SDValue Op0 = N->getOperand(0);
18690     auto checkElem = [&](SDValue Op) -> uint64_t {
18691       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
18692           (Op0.getOperand(0) == Op.getOperand(0)))
18693         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
18694           return CNode->getZExtValue();
18695       return -1;
18696     };
18697 
18698     int Offset = checkElem(Op0);
18699     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
18700       if (Offset + i != checkElem(N->getOperand(i))) {
18701         Offset = -1;
18702         break;
18703       }
18704     }
18705 
18706     if ((Offset == 0) &&
18707         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
18708       return Op0.getOperand(0);
18709     if ((Offset != -1) &&
18710         ((Offset % N->getValueType(0).getVectorNumElements()) ==
18711          0)) // IDX must be multiple of output size.
18712       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
18713                          Op0.getOperand(0), Op0.getOperand(1));
18714   }
18715 
18716   if (SDValue V = convertBuildVecZextToZext(N))
18717     return V;
18718 
18719   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
18720     return V;
18721 
18722   if (SDValue V = reduceBuildVecTruncToBitCast(N))
18723     return V;
18724 
18725   if (SDValue V = reduceBuildVecToShuffle(N))
18726     return V;
18727 
18728   return SDValue();
18729 }
18730 
18731 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
18732   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18733   EVT OpVT = N->getOperand(0).getValueType();
18734 
18735   // If the operands are legal vectors, leave them alone.
18736   if (TLI.isTypeLegal(OpVT))
18737     return SDValue();
18738 
18739   SDLoc DL(N);
18740   EVT VT = N->getValueType(0);
18741   SmallVector<SDValue, 8> Ops;
18742 
18743   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
18744   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18745 
18746   // Keep track of what we encounter.
18747   bool AnyInteger = false;
18748   bool AnyFP = false;
18749   for (const SDValue &Op : N->ops()) {
18750     if (ISD::BITCAST == Op.getOpcode() &&
18751         !Op.getOperand(0).getValueType().isVector())
18752       Ops.push_back(Op.getOperand(0));
18753     else if (ISD::UNDEF == Op.getOpcode())
18754       Ops.push_back(ScalarUndef);
18755     else
18756       return SDValue();
18757 
18758     // Note whether we encounter an integer or floating point scalar.
18759     // If it's neither, bail out, it could be something weird like x86mmx.
18760     EVT LastOpVT = Ops.back().getValueType();
18761     if (LastOpVT.isFloatingPoint())
18762       AnyFP = true;
18763     else if (LastOpVT.isInteger())
18764       AnyInteger = true;
18765     else
18766       return SDValue();
18767   }
18768 
18769   // If any of the operands is a floating point scalar bitcast to a vector,
18770   // use floating point types throughout, and bitcast everything.
18771   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
18772   if (AnyFP) {
18773     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
18774     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18775     if (AnyInteger) {
18776       for (SDValue &Op : Ops) {
18777         if (Op.getValueType() == SVT)
18778           continue;
18779         if (Op.isUndef())
18780           Op = ScalarUndef;
18781         else
18782           Op = DAG.getBitcast(SVT, Op);
18783       }
18784     }
18785   }
18786 
18787   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
18788                                VT.getSizeInBits() / SVT.getSizeInBits());
18789   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
18790 }
18791 
18792 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
18793 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
18794 // most two distinct vectors the same size as the result, attempt to turn this
18795 // into a legal shuffle.
18796 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
18797   EVT VT = N->getValueType(0);
18798   EVT OpVT = N->getOperand(0).getValueType();
18799 
18800   // We currently can't generate an appropriate shuffle for a scalable vector.
18801   if (VT.isScalableVector())
18802     return SDValue();
18803 
18804   int NumElts = VT.getVectorNumElements();
18805   int NumOpElts = OpVT.getVectorNumElements();
18806 
18807   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
18808   SmallVector<int, 8> Mask;
18809 
18810   for (SDValue Op : N->ops()) {
18811     Op = peekThroughBitcasts(Op);
18812 
18813     // UNDEF nodes convert to UNDEF shuffle mask values.
18814     if (Op.isUndef()) {
18815       Mask.append((unsigned)NumOpElts, -1);
18816       continue;
18817     }
18818 
18819     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18820       return SDValue();
18821 
18822     // What vector are we extracting the subvector from and at what index?
18823     SDValue ExtVec = Op.getOperand(0);
18824     int ExtIdx = Op.getConstantOperandVal(1);
18825 
18826     // We want the EVT of the original extraction to correctly scale the
18827     // extraction index.
18828     EVT ExtVT = ExtVec.getValueType();
18829     ExtVec = peekThroughBitcasts(ExtVec);
18830 
18831     // UNDEF nodes convert to UNDEF shuffle mask values.
18832     if (ExtVec.isUndef()) {
18833       Mask.append((unsigned)NumOpElts, -1);
18834       continue;
18835     }
18836 
18837     // Ensure that we are extracting a subvector from a vector the same
18838     // size as the result.
18839     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
18840       return SDValue();
18841 
18842     // Scale the subvector index to account for any bitcast.
18843     int NumExtElts = ExtVT.getVectorNumElements();
18844     if (0 == (NumExtElts % NumElts))
18845       ExtIdx /= (NumExtElts / NumElts);
18846     else if (0 == (NumElts % NumExtElts))
18847       ExtIdx *= (NumElts / NumExtElts);
18848     else
18849       return SDValue();
18850 
18851     // At most we can reference 2 inputs in the final shuffle.
18852     if (SV0.isUndef() || SV0 == ExtVec) {
18853       SV0 = ExtVec;
18854       for (int i = 0; i != NumOpElts; ++i)
18855         Mask.push_back(i + ExtIdx);
18856     } else if (SV1.isUndef() || SV1 == ExtVec) {
18857       SV1 = ExtVec;
18858       for (int i = 0; i != NumOpElts; ++i)
18859         Mask.push_back(i + ExtIdx + NumElts);
18860     } else {
18861       return SDValue();
18862     }
18863   }
18864 
18865   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18866   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
18867                                      DAG.getBitcast(VT, SV1), Mask, DAG);
18868 }
18869 
18870 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
18871   unsigned CastOpcode = N->getOperand(0).getOpcode();
18872   switch (CastOpcode) {
18873   case ISD::SINT_TO_FP:
18874   case ISD::UINT_TO_FP:
18875   case ISD::FP_TO_SINT:
18876   case ISD::FP_TO_UINT:
18877     // TODO: Allow more opcodes?
18878     //  case ISD::BITCAST:
18879     //  case ISD::TRUNCATE:
18880     //  case ISD::ZERO_EXTEND:
18881     //  case ISD::SIGN_EXTEND:
18882     //  case ISD::FP_EXTEND:
18883     break;
18884   default:
18885     return SDValue();
18886   }
18887 
18888   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
18889   if (!SrcVT.isVector())
18890     return SDValue();
18891 
18892   // All operands of the concat must be the same kind of cast from the same
18893   // source type.
18894   SmallVector<SDValue, 4> SrcOps;
18895   for (SDValue Op : N->ops()) {
18896     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
18897         Op.getOperand(0).getValueType() != SrcVT)
18898       return SDValue();
18899     SrcOps.push_back(Op.getOperand(0));
18900   }
18901 
18902   // The wider cast must be supported by the target. This is unusual because
18903   // the operation support type parameter depends on the opcode. In addition,
18904   // check the other type in the cast to make sure this is really legal.
18905   EVT VT = N->getValueType(0);
18906   EVT SrcEltVT = SrcVT.getVectorElementType();
18907   unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands();
18908   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
18909   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18910   switch (CastOpcode) {
18911   case ISD::SINT_TO_FP:
18912   case ISD::UINT_TO_FP:
18913     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
18914         !TLI.isTypeLegal(VT))
18915       return SDValue();
18916     break;
18917   case ISD::FP_TO_SINT:
18918   case ISD::FP_TO_UINT:
18919     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
18920         !TLI.isTypeLegal(ConcatSrcVT))
18921       return SDValue();
18922     break;
18923   default:
18924     llvm_unreachable("Unexpected cast opcode");
18925   }
18926 
18927   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
18928   SDLoc DL(N);
18929   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
18930   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
18931 }
18932 
18933 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
18934   // If we only have one input vector, we don't need to do any concatenation.
18935   if (N->getNumOperands() == 1)
18936     return N->getOperand(0);
18937 
18938   // Check if all of the operands are undefs.
18939   EVT VT = N->getValueType(0);
18940   if (ISD::allOperandsUndef(N))
18941     return DAG.getUNDEF(VT);
18942 
18943   // Optimize concat_vectors where all but the first of the vectors are undef.
18944   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
18945         return Op.isUndef();
18946       })) {
18947     SDValue In = N->getOperand(0);
18948     assert(In.getValueType().isVector() && "Must concat vectors");
18949 
18950     // If the input is a concat_vectors, just make a larger concat by padding
18951     // with smaller undefs.
18952     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
18953       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
18954       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
18955       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
18956       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18957     }
18958 
18959     SDValue Scalar = peekThroughOneUseBitcasts(In);
18960 
18961     // concat_vectors(scalar_to_vector(scalar), undef) ->
18962     //     scalar_to_vector(scalar)
18963     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18964          Scalar.hasOneUse()) {
18965       EVT SVT = Scalar.getValueType().getVectorElementType();
18966       if (SVT == Scalar.getOperand(0).getValueType())
18967         Scalar = Scalar.getOperand(0);
18968     }
18969 
18970     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
18971     if (!Scalar.getValueType().isVector()) {
18972       // If the bitcast type isn't legal, it might be a trunc of a legal type;
18973       // look through the trunc so we can still do the transform:
18974       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
18975       if (Scalar->getOpcode() == ISD::TRUNCATE &&
18976           !TLI.isTypeLegal(Scalar.getValueType()) &&
18977           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
18978         Scalar = Scalar->getOperand(0);
18979 
18980       EVT SclTy = Scalar.getValueType();
18981 
18982       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
18983         return SDValue();
18984 
18985       // Bail out if the vector size is not a multiple of the scalar size.
18986       if (VT.getSizeInBits() % SclTy.getSizeInBits())
18987         return SDValue();
18988 
18989       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
18990       if (VNTNumElms < 2)
18991         return SDValue();
18992 
18993       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
18994       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
18995         return SDValue();
18996 
18997       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
18998       return DAG.getBitcast(VT, Res);
18999     }
19000   }
19001 
19002   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
19003   // We have already tested above for an UNDEF only concatenation.
19004   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
19005   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
19006   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
19007     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
19008   };
19009   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
19010     SmallVector<SDValue, 8> Opnds;
19011     EVT SVT = VT.getScalarType();
19012 
19013     EVT MinVT = SVT;
19014     if (!SVT.isFloatingPoint()) {
19015       // If BUILD_VECTOR are from built from integer, they may have different
19016       // operand types. Get the smallest type and truncate all operands to it.
19017       bool FoundMinVT = false;
19018       for (const SDValue &Op : N->ops())
19019         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19020           EVT OpSVT = Op.getOperand(0).getValueType();
19021           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
19022           FoundMinVT = true;
19023         }
19024       assert(FoundMinVT && "Concat vector type mismatch");
19025     }
19026 
19027     for (const SDValue &Op : N->ops()) {
19028       EVT OpVT = Op.getValueType();
19029       unsigned NumElts = OpVT.getVectorNumElements();
19030 
19031       if (ISD::UNDEF == Op.getOpcode())
19032         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
19033 
19034       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19035         if (SVT.isFloatingPoint()) {
19036           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
19037           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
19038         } else {
19039           for (unsigned i = 0; i != NumElts; ++i)
19040             Opnds.push_back(
19041                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
19042         }
19043       }
19044     }
19045 
19046     assert(VT.getVectorNumElements() == Opnds.size() &&
19047            "Concat vector type mismatch");
19048     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
19049   }
19050 
19051   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
19052   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
19053     return V;
19054 
19055   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
19056   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
19057     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
19058       return V;
19059 
19060   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
19061     return V;
19062 
19063   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
19064   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
19065   // operands and look for a CONCAT operations that place the incoming vectors
19066   // at the exact same location.
19067   //
19068   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
19069   SDValue SingleSource = SDValue();
19070   unsigned PartNumElem =
19071       N->getOperand(0).getValueType().getVectorMinNumElements();
19072 
19073   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19074     SDValue Op = N->getOperand(i);
19075 
19076     if (Op.isUndef())
19077       continue;
19078 
19079     // Check if this is the identity extract:
19080     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19081       return SDValue();
19082 
19083     // Find the single incoming vector for the extract_subvector.
19084     if (SingleSource.getNode()) {
19085       if (Op.getOperand(0) != SingleSource)
19086         return SDValue();
19087     } else {
19088       SingleSource = Op.getOperand(0);
19089 
19090       // Check the source type is the same as the type of the result.
19091       // If not, this concat may extend the vector, so we can not
19092       // optimize it away.
19093       if (SingleSource.getValueType() != N->getValueType(0))
19094         return SDValue();
19095     }
19096 
19097     // Check that we are reading from the identity index.
19098     unsigned IdentityIndex = i * PartNumElem;
19099     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
19100       return SDValue();
19101   }
19102 
19103   if (SingleSource.getNode())
19104     return SingleSource;
19105 
19106   return SDValue();
19107 }
19108 
19109 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
19110 // if the subvector can be sourced for free.
19111 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
19112   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
19113       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
19114     return V.getOperand(1);
19115   }
19116   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19117   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
19118       V.getOperand(0).getValueType() == SubVT &&
19119       (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
19120     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
19121     return V.getOperand(SubIdx);
19122   }
19123   return SDValue();
19124 }
19125 
19126 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
19127                                               SelectionDAG &DAG) {
19128   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19129   SDValue BinOp = Extract->getOperand(0);
19130   unsigned BinOpcode = BinOp.getOpcode();
19131   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
19132     return SDValue();
19133 
19134   EVT VecVT = BinOp.getValueType();
19135   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
19136   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
19137     return SDValue();
19138 
19139   SDValue Index = Extract->getOperand(1);
19140   EVT SubVT = Extract->getValueType(0);
19141   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
19142     return SDValue();
19143 
19144   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
19145   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
19146 
19147   // TODO: We could handle the case where only 1 operand is being inserted by
19148   //       creating an extract of the other operand, but that requires checking
19149   //       number of uses and/or costs.
19150   if (!Sub0 || !Sub1)
19151     return SDValue();
19152 
19153   // We are inserting both operands of the wide binop only to extract back
19154   // to the narrow vector size. Eliminate all of the insert/extract:
19155   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
19156   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
19157                      BinOp->getFlags());
19158 }
19159 
19160 /// If we are extracting a subvector produced by a wide binary operator try
19161 /// to use a narrow binary operator and/or avoid concatenation and extraction.
19162 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
19163   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
19164   // some of these bailouts with other transforms.
19165 
19166   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
19167     return V;
19168 
19169   // The extract index must be a constant, so we can map it to a concat operand.
19170   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19171   if (!ExtractIndexC)
19172     return SDValue();
19173 
19174   // We are looking for an optionally bitcasted wide vector binary operator
19175   // feeding an extract subvector.
19176   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19177   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
19178   unsigned BOpcode = BinOp.getOpcode();
19179   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
19180     return SDValue();
19181 
19182   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
19183   // reduced to the unary fneg when it is visited, and we probably want to deal
19184   // with fneg in a target-specific way.
19185   if (BOpcode == ISD::FSUB) {
19186     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
19187     if (C && C->getValueAPF().isNegZero())
19188       return SDValue();
19189   }
19190 
19191   // The binop must be a vector type, so we can extract some fraction of it.
19192   EVT WideBVT = BinOp.getValueType();
19193   if (!WideBVT.isVector())
19194     return SDValue();
19195 
19196   EVT VT = Extract->getValueType(0);
19197   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
19198   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
19199          "Extract index is not a multiple of the vector length.");
19200 
19201   // Bail out if this is not a proper multiple width extraction.
19202   unsigned WideWidth = WideBVT.getSizeInBits();
19203   unsigned NarrowWidth = VT.getSizeInBits();
19204   if (WideWidth % NarrowWidth != 0)
19205     return SDValue();
19206 
19207   // Bail out if we are extracting a fraction of a single operation. This can
19208   // occur because we potentially looked through a bitcast of the binop.
19209   unsigned NarrowingRatio = WideWidth / NarrowWidth;
19210   unsigned WideNumElts = WideBVT.getVectorNumElements();
19211   if (WideNumElts % NarrowingRatio != 0)
19212     return SDValue();
19213 
19214   // Bail out if the target does not support a narrower version of the binop.
19215   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
19216                                    WideNumElts / NarrowingRatio);
19217   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
19218     return SDValue();
19219 
19220   // If extraction is cheap, we don't need to look at the binop operands
19221   // for concat ops. The narrow binop alone makes this transform profitable.
19222   // We can't just reuse the original extract index operand because we may have
19223   // bitcasted.
19224   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
19225   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
19226   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
19227       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
19228     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
19229     SDLoc DL(Extract);
19230     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19231     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19232                             BinOp.getOperand(0), NewExtIndex);
19233     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19234                             BinOp.getOperand(1), NewExtIndex);
19235     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
19236                                       BinOp.getNode()->getFlags());
19237     return DAG.getBitcast(VT, NarrowBinOp);
19238   }
19239 
19240   // Only handle the case where we are doubling and then halving. A larger ratio
19241   // may require more than two narrow binops to replace the wide binop.
19242   if (NarrowingRatio != 2)
19243     return SDValue();
19244 
19245   // TODO: The motivating case for this transform is an x86 AVX1 target. That
19246   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
19247   // flavors, but no other 256-bit integer support. This could be extended to
19248   // handle any binop, but that may require fixing/adding other folds to avoid
19249   // codegen regressions.
19250   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
19251     return SDValue();
19252 
19253   // We need at least one concatenation operation of a binop operand to make
19254   // this transform worthwhile. The concat must double the input vector sizes.
19255   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
19256     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
19257       return V.getOperand(ConcatOpNum);
19258     return SDValue();
19259   };
19260   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
19261   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
19262 
19263   if (SubVecL || SubVecR) {
19264     // If a binop operand was not the result of a concat, we must extract a
19265     // half-sized operand for our new narrow binop:
19266     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
19267     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
19268     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
19269     SDLoc DL(Extract);
19270     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19271     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
19272                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19273                                       BinOp.getOperand(0), IndexC);
19274 
19275     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
19276                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19277                                       BinOp.getOperand(1), IndexC);
19278 
19279     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
19280     return DAG.getBitcast(VT, NarrowBinOp);
19281   }
19282 
19283   return SDValue();
19284 }
19285 
19286 /// If we are extracting a subvector from a wide vector load, convert to a
19287 /// narrow load to eliminate the extraction:
19288 /// (extract_subvector (load wide vector)) --> (load narrow vector)
19289 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
19290   // TODO: Add support for big-endian. The offset calculation must be adjusted.
19291   if (DAG.getDataLayout().isBigEndian())
19292     return SDValue();
19293 
19294   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
19295   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19296   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
19297       !ExtIdx)
19298     return SDValue();
19299 
19300   // Allow targets to opt-out.
19301   EVT VT = Extract->getValueType(0);
19302 
19303   // We can only create byte sized loads.
19304   if (!VT.isByteSized())
19305     return SDValue();
19306 
19307   unsigned Index = ExtIdx->getZExtValue();
19308   unsigned NumElts = VT.getVectorNumElements();
19309 
19310   // If the index is a multiple of the extract element count, we can offset the
19311   // address by the store size multiplied by the subvector index. Otherwise if
19312   // the scalar type is byte sized, we can just use the index multiplied by
19313   // the element size in bytes as the offset.
19314   unsigned Offset;
19315   if (Index % NumElts == 0)
19316     Offset = (Index / NumElts) * VT.getStoreSize();
19317   else if (VT.getScalarType().isByteSized())
19318     Offset = Index * VT.getScalarType().getStoreSize();
19319   else
19320     return SDValue();
19321 
19322   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19323   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
19324     return SDValue();
19325 
19326   // The narrow load will be offset from the base address of the old load if
19327   // we are extracting from something besides index 0 (little-endian).
19328   SDLoc DL(Extract);
19329   SDValue BaseAddr = Ld->getBasePtr();
19330 
19331   // TODO: Use "BaseIndexOffset" to make this more effective.
19332   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
19333   MachineFunction &MF = DAG.getMachineFunction();
19334   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
19335                                                    VT.getStoreSize());
19336   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
19337   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
19338   return NewLd;
19339 }
19340 
19341 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
19342   EVT NVT = N->getValueType(0);
19343   SDValue V = N->getOperand(0);
19344   uint64_t ExtIdx = N->getConstantOperandVal(1);
19345 
19346   // Extract from UNDEF is UNDEF.
19347   if (V.isUndef())
19348     return DAG.getUNDEF(NVT);
19349 
19350   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
19351     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
19352       return NarrowLoad;
19353 
19354   // Combine an extract of an extract into a single extract_subvector.
19355   // ext (ext X, C), 0 --> ext X, C
19356   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
19357     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
19358                                     V.getConstantOperandVal(1)) &&
19359         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
19360       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
19361                          V.getOperand(1));
19362     }
19363   }
19364 
19365   // Try to move vector bitcast after extract_subv by scaling extraction index:
19366   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
19367   if (V.getOpcode() == ISD::BITCAST &&
19368       V.getOperand(0).getValueType().isVector()) {
19369     SDValue SrcOp = V.getOperand(0);
19370     EVT SrcVT = SrcOp.getValueType();
19371     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
19372     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
19373     if ((SrcNumElts % DestNumElts) == 0) {
19374       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
19375       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
19376       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
19377                                       NewExtEC);
19378       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19379         SDLoc DL(N);
19380         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
19381         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19382                                          V.getOperand(0), NewIndex);
19383         return DAG.getBitcast(NVT, NewExtract);
19384       }
19385     }
19386     if ((DestNumElts % SrcNumElts) == 0) {
19387       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
19388       if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) {
19389         ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio;
19390         EVT ScalarVT = SrcVT.getScalarType();
19391         if ((ExtIdx % DestSrcRatio) == 0) {
19392           SDLoc DL(N);
19393           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
19394           EVT NewExtVT =
19395               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
19396           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19397             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19398             SDValue NewExtract =
19399                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19400                             V.getOperand(0), NewIndex);
19401             return DAG.getBitcast(NVT, NewExtract);
19402           }
19403           if (NewExtEC == 1 &&
19404               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
19405             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19406             SDValue NewExtract =
19407                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
19408                             V.getOperand(0), NewIndex);
19409             return DAG.getBitcast(NVT, NewExtract);
19410           }
19411         }
19412       }
19413     }
19414   }
19415 
19416   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
19417     unsigned ExtNumElts = NVT.getVectorMinNumElements();
19418     EVT ConcatSrcVT = V.getOperand(0).getValueType();
19419     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
19420            "Concat and extract subvector do not change element type");
19421     assert((ExtIdx % ExtNumElts) == 0 &&
19422            "Extract index is not a multiple of the input vector length.");
19423 
19424     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
19425     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
19426 
19427     // If the concatenated source types match this extract, it's a direct
19428     // simplification:
19429     // extract_subvec (concat V1, V2, ...), i --> Vi
19430     if (ConcatSrcNumElts == ExtNumElts)
19431       return V.getOperand(ConcatOpIdx);
19432 
19433     // If the concatenated source vectors are a multiple length of this extract,
19434     // then extract a fraction of one of those source vectors directly from a
19435     // concat operand. Example:
19436     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
19437     //   v2i8 extract_subvec v8i8 Y, 6
19438     if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
19439       SDLoc DL(N);
19440       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
19441       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
19442              "Trying to extract from >1 concat operand?");
19443       assert(NewExtIdx % ExtNumElts == 0 &&
19444              "Extract index is not a multiple of the input vector length.");
19445       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
19446       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
19447                          V.getOperand(ConcatOpIdx), NewIndexC);
19448     }
19449   }
19450 
19451   V = peekThroughBitcasts(V);
19452 
19453   // If the input is a build vector. Try to make a smaller build vector.
19454   if (V.getOpcode() == ISD::BUILD_VECTOR) {
19455     EVT InVT = V.getValueType();
19456     unsigned ExtractSize = NVT.getSizeInBits();
19457     unsigned EltSize = InVT.getScalarSizeInBits();
19458     // Only do this if we won't split any elements.
19459     if (ExtractSize % EltSize == 0) {
19460       unsigned NumElems = ExtractSize / EltSize;
19461       EVT EltVT = InVT.getVectorElementType();
19462       EVT ExtractVT =
19463           NumElems == 1 ? EltVT
19464                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
19465       if ((Level < AfterLegalizeDAG ||
19466            (NumElems == 1 ||
19467             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
19468           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
19469         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
19470 
19471         if (NumElems == 1) {
19472           SDValue Src = V->getOperand(IdxVal);
19473           if (EltVT != Src.getValueType())
19474             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
19475           return DAG.getBitcast(NVT, Src);
19476         }
19477 
19478         // Extract the pieces from the original build_vector.
19479         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
19480                                               V->ops().slice(IdxVal, NumElems));
19481         return DAG.getBitcast(NVT, BuildVec);
19482       }
19483     }
19484   }
19485 
19486   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
19487     // Handle only simple case where vector being inserted and vector
19488     // being extracted are of same size.
19489     EVT SmallVT = V.getOperand(1).getValueType();
19490     if (!NVT.bitsEq(SmallVT))
19491       return SDValue();
19492 
19493     // Combine:
19494     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
19495     // Into:
19496     //    indices are equal or bit offsets are equal => V1
19497     //    otherwise => (extract_subvec V1, ExtIdx)
19498     uint64_t InsIdx = V.getConstantOperandVal(2);
19499     if (InsIdx * SmallVT.getScalarSizeInBits() ==
19500         ExtIdx * NVT.getScalarSizeInBits())
19501       return DAG.getBitcast(NVT, V.getOperand(1));
19502     return DAG.getNode(
19503         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
19504         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
19505         N->getOperand(1));
19506   }
19507 
19508   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
19509     return NarrowBOp;
19510 
19511   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19512     return SDValue(N, 0);
19513 
19514   return SDValue();
19515 }
19516 
19517 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
19518 /// followed by concatenation. Narrow vector ops may have better performance
19519 /// than wide ops, and this can unlock further narrowing of other vector ops.
19520 /// Targets can invert this transform later if it is not profitable.
19521 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
19522                                          SelectionDAG &DAG) {
19523   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
19524   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
19525       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
19526       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
19527     return SDValue();
19528 
19529   // Split the wide shuffle mask into halves. Any mask element that is accessing
19530   // operand 1 is offset down to account for narrowing of the vectors.
19531   ArrayRef<int> Mask = Shuf->getMask();
19532   EVT VT = Shuf->getValueType(0);
19533   unsigned NumElts = VT.getVectorNumElements();
19534   unsigned HalfNumElts = NumElts / 2;
19535   SmallVector<int, 16> Mask0(HalfNumElts, -1);
19536   SmallVector<int, 16> Mask1(HalfNumElts, -1);
19537   for (unsigned i = 0; i != NumElts; ++i) {
19538     if (Mask[i] == -1)
19539       continue;
19540     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
19541     if (i < HalfNumElts)
19542       Mask0[i] = M;
19543     else
19544       Mask1[i - HalfNumElts] = M;
19545   }
19546 
19547   // Ask the target if this is a valid transform.
19548   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19549   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
19550                                 HalfNumElts);
19551   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
19552       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
19553     return SDValue();
19554 
19555   // shuffle (concat X, undef), (concat Y, undef), Mask -->
19556   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
19557   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
19558   SDLoc DL(Shuf);
19559   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
19560   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
19561   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
19562 }
19563 
19564 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
19565 // or turn a shuffle of a single concat into simpler shuffle then concat.
19566 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
19567   EVT VT = N->getValueType(0);
19568   unsigned NumElts = VT.getVectorNumElements();
19569 
19570   SDValue N0 = N->getOperand(0);
19571   SDValue N1 = N->getOperand(1);
19572   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
19573   ArrayRef<int> Mask = SVN->getMask();
19574 
19575   SmallVector<SDValue, 4> Ops;
19576   EVT ConcatVT = N0.getOperand(0).getValueType();
19577   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
19578   unsigned NumConcats = NumElts / NumElemsPerConcat;
19579 
19580   auto IsUndefMaskElt = [](int i) { return i == -1; };
19581 
19582   // Special case: shuffle(concat(A,B)) can be more efficiently represented
19583   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
19584   // half vector elements.
19585   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
19586       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
19587                    IsUndefMaskElt)) {
19588     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
19589                               N0.getOperand(1),
19590                               Mask.slice(0, NumElemsPerConcat));
19591     N1 = DAG.getUNDEF(ConcatVT);
19592     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
19593   }
19594 
19595   // Look at every vector that's inserted. We're looking for exact
19596   // subvector-sized copies from a concatenated vector
19597   for (unsigned I = 0; I != NumConcats; ++I) {
19598     unsigned Begin = I * NumElemsPerConcat;
19599     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
19600 
19601     // Make sure we're dealing with a copy.
19602     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
19603       Ops.push_back(DAG.getUNDEF(ConcatVT));
19604       continue;
19605     }
19606 
19607     int OpIdx = -1;
19608     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
19609       if (IsUndefMaskElt(SubMask[i]))
19610         continue;
19611       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
19612         return SDValue();
19613       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
19614       if (0 <= OpIdx && EltOpIdx != OpIdx)
19615         return SDValue();
19616       OpIdx = EltOpIdx;
19617     }
19618     assert(0 <= OpIdx && "Unknown concat_vectors op");
19619 
19620     if (OpIdx < (int)N0.getNumOperands())
19621       Ops.push_back(N0.getOperand(OpIdx));
19622     else
19623       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
19624   }
19625 
19626   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19627 }
19628 
19629 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
19630 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
19631 //
19632 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
19633 // a simplification in some sense, but it isn't appropriate in general: some
19634 // BUILD_VECTORs are substantially cheaper than others. The general case
19635 // of a BUILD_VECTOR requires inserting each element individually (or
19636 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
19637 // all constants is a single constant pool load.  A BUILD_VECTOR where each
19638 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
19639 // are undef lowers to a small number of element insertions.
19640 //
19641 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
19642 // We don't fold shuffles where one side is a non-zero constant, and we don't
19643 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
19644 // non-constant operands. This seems to work out reasonably well in practice.
19645 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
19646                                        SelectionDAG &DAG,
19647                                        const TargetLowering &TLI) {
19648   EVT VT = SVN->getValueType(0);
19649   unsigned NumElts = VT.getVectorNumElements();
19650   SDValue N0 = SVN->getOperand(0);
19651   SDValue N1 = SVN->getOperand(1);
19652 
19653   if (!N0->hasOneUse())
19654     return SDValue();
19655 
19656   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
19657   // discussed above.
19658   if (!N1.isUndef()) {
19659     if (!N1->hasOneUse())
19660       return SDValue();
19661 
19662     bool N0AnyConst = isAnyConstantBuildVector(N0);
19663     bool N1AnyConst = isAnyConstantBuildVector(N1);
19664     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
19665       return SDValue();
19666     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
19667       return SDValue();
19668   }
19669 
19670   // If both inputs are splats of the same value then we can safely merge this
19671   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
19672   bool IsSplat = false;
19673   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
19674   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
19675   if (BV0 && BV1)
19676     if (SDValue Splat0 = BV0->getSplatValue())
19677       IsSplat = (Splat0 == BV1->getSplatValue());
19678 
19679   SmallVector<SDValue, 8> Ops;
19680   SmallSet<SDValue, 16> DuplicateOps;
19681   for (int M : SVN->getMask()) {
19682     SDValue Op = DAG.getUNDEF(VT.getScalarType());
19683     if (M >= 0) {
19684       int Idx = M < (int)NumElts ? M : M - NumElts;
19685       SDValue &S = (M < (int)NumElts ? N0 : N1);
19686       if (S.getOpcode() == ISD::BUILD_VECTOR) {
19687         Op = S.getOperand(Idx);
19688       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19689         SDValue Op0 = S.getOperand(0);
19690         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
19691       } else {
19692         // Operand can't be combined - bail out.
19693         return SDValue();
19694       }
19695     }
19696 
19697     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
19698     // generating a splat; semantically, this is fine, but it's likely to
19699     // generate low-quality code if the target can't reconstruct an appropriate
19700     // shuffle.
19701     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
19702       if (!IsSplat && !DuplicateOps.insert(Op).second)
19703         return SDValue();
19704 
19705     Ops.push_back(Op);
19706   }
19707 
19708   // BUILD_VECTOR requires all inputs to be of the same type, find the
19709   // maximum type and extend them all.
19710   EVT SVT = VT.getScalarType();
19711   if (SVT.isInteger())
19712     for (SDValue &Op : Ops)
19713       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
19714   if (SVT != VT.getScalarType())
19715     for (SDValue &Op : Ops)
19716       Op = TLI.isZExtFree(Op.getValueType(), SVT)
19717                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
19718                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
19719   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
19720 }
19721 
19722 // Match shuffles that can be converted to any_vector_extend_in_reg.
19723 // This is often generated during legalization.
19724 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
19725 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
19726 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
19727                                             SelectionDAG &DAG,
19728                                             const TargetLowering &TLI,
19729                                             bool LegalOperations) {
19730   EVT VT = SVN->getValueType(0);
19731   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19732 
19733   // TODO Add support for big-endian when we have a test case.
19734   if (!VT.isInteger() || IsBigEndian)
19735     return SDValue();
19736 
19737   unsigned NumElts = VT.getVectorNumElements();
19738   unsigned EltSizeInBits = VT.getScalarSizeInBits();
19739   ArrayRef<int> Mask = SVN->getMask();
19740   SDValue N0 = SVN->getOperand(0);
19741 
19742   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
19743   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
19744     for (unsigned i = 0; i != NumElts; ++i) {
19745       if (Mask[i] < 0)
19746         continue;
19747       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
19748         continue;
19749       return false;
19750     }
19751     return true;
19752   };
19753 
19754   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
19755   // power-of-2 extensions as they are the most likely.
19756   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
19757     // Check for non power of 2 vector sizes
19758     if (NumElts % Scale != 0)
19759       continue;
19760     if (!isAnyExtend(Scale))
19761       continue;
19762 
19763     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
19764     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
19765     // Never create an illegal type. Only create unsupported operations if we
19766     // are pre-legalization.
19767     if (TLI.isTypeLegal(OutVT))
19768       if (!LegalOperations ||
19769           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
19770         return DAG.getBitcast(VT,
19771                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
19772                                           SDLoc(SVN), OutVT, N0));
19773   }
19774 
19775   return SDValue();
19776 }
19777 
19778 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
19779 // each source element of a large type into the lowest elements of a smaller
19780 // destination type. This is often generated during legalization.
19781 // If the source node itself was a '*_extend_vector_inreg' node then we should
19782 // then be able to remove it.
19783 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
19784                                         SelectionDAG &DAG) {
19785   EVT VT = SVN->getValueType(0);
19786   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19787 
19788   // TODO Add support for big-endian when we have a test case.
19789   if (!VT.isInteger() || IsBigEndian)
19790     return SDValue();
19791 
19792   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
19793 
19794   unsigned Opcode = N0.getOpcode();
19795   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
19796       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
19797       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
19798     return SDValue();
19799 
19800   SDValue N00 = N0.getOperand(0);
19801   ArrayRef<int> Mask = SVN->getMask();
19802   unsigned NumElts = VT.getVectorNumElements();
19803   unsigned EltSizeInBits = VT.getScalarSizeInBits();
19804   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
19805   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
19806 
19807   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
19808     return SDValue();
19809   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
19810 
19811   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
19812   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
19813   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
19814   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
19815     for (unsigned i = 0; i != NumElts; ++i) {
19816       if (Mask[i] < 0)
19817         continue;
19818       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
19819         continue;
19820       return false;
19821     }
19822     return true;
19823   };
19824 
19825   // At the moment we just handle the case where we've truncated back to the
19826   // same size as before the extension.
19827   // TODO: handle more extension/truncation cases as cases arise.
19828   if (EltSizeInBits != ExtSrcSizeInBits)
19829     return SDValue();
19830 
19831   // We can remove *extend_vector_inreg only if the truncation happens at
19832   // the same scale as the extension.
19833   if (isTruncate(ExtScale))
19834     return DAG.getBitcast(VT, N00);
19835 
19836   return SDValue();
19837 }
19838 
19839 // Combine shuffles of splat-shuffles of the form:
19840 // shuffle (shuffle V, undef, splat-mask), undef, M
19841 // If splat-mask contains undef elements, we need to be careful about
19842 // introducing undef's in the folded mask which are not the result of composing
19843 // the masks of the shuffles.
19844 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
19845                                         SelectionDAG &DAG) {
19846   if (!Shuf->getOperand(1).isUndef())
19847     return SDValue();
19848   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
19849   if (!Splat || !Splat->isSplat())
19850     return SDValue();
19851 
19852   ArrayRef<int> ShufMask = Shuf->getMask();
19853   ArrayRef<int> SplatMask = Splat->getMask();
19854   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
19855 
19856   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
19857   // every undef mask element in the splat-shuffle has a corresponding undef
19858   // element in the user-shuffle's mask or if the composition of mask elements
19859   // would result in undef.
19860   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
19861   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
19862   //   In this case it is not legal to simplify to the splat-shuffle because we
19863   //   may be exposing the users of the shuffle an undef element at index 1
19864   //   which was not there before the combine.
19865   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
19866   //   In this case the composition of masks yields SplatMask, so it's ok to
19867   //   simplify to the splat-shuffle.
19868   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
19869   //   In this case the composed mask includes all undef elements of SplatMask
19870   //   and in addition sets element zero to undef. It is safe to simplify to
19871   //   the splat-shuffle.
19872   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
19873                                        ArrayRef<int> SplatMask) {
19874     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
19875       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
19876           SplatMask[UserMask[i]] != -1)
19877         return false;
19878     return true;
19879   };
19880   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
19881     return Shuf->getOperand(0);
19882 
19883   // Create a new shuffle with a mask that is composed of the two shuffles'
19884   // masks.
19885   SmallVector<int, 32> NewMask;
19886   for (int Idx : ShufMask)
19887     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
19888 
19889   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
19890                               Splat->getOperand(0), Splat->getOperand(1),
19891                               NewMask);
19892 }
19893 
19894 /// Combine shuffle of shuffle of the form:
19895 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
19896 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
19897                                      SelectionDAG &DAG) {
19898   if (!OuterShuf->getOperand(1).isUndef())
19899     return SDValue();
19900   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
19901   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
19902     return SDValue();
19903 
19904   ArrayRef<int> OuterMask = OuterShuf->getMask();
19905   ArrayRef<int> InnerMask = InnerShuf->getMask();
19906   unsigned NumElts = OuterMask.size();
19907   assert(NumElts == InnerMask.size() && "Mask length mismatch");
19908   SmallVector<int, 32> CombinedMask(NumElts, -1);
19909   int SplatIndex = -1;
19910   for (unsigned i = 0; i != NumElts; ++i) {
19911     // Undef lanes remain undef.
19912     int OuterMaskElt = OuterMask[i];
19913     if (OuterMaskElt == -1)
19914       continue;
19915 
19916     // Peek through the shuffle masks to get the underlying source element.
19917     int InnerMaskElt = InnerMask[OuterMaskElt];
19918     if (InnerMaskElt == -1)
19919       continue;
19920 
19921     // Initialize the splatted element.
19922     if (SplatIndex == -1)
19923       SplatIndex = InnerMaskElt;
19924 
19925     // Non-matching index - this is not a splat.
19926     if (SplatIndex != InnerMaskElt)
19927       return SDValue();
19928 
19929     CombinedMask[i] = InnerMaskElt;
19930   }
19931   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
19932           getSplatIndex(CombinedMask) != -1) &&
19933          "Expected a splat mask");
19934 
19935   // TODO: The transform may be a win even if the mask is not legal.
19936   EVT VT = OuterShuf->getValueType(0);
19937   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
19938   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
19939     return SDValue();
19940 
19941   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
19942                               InnerShuf->getOperand(1), CombinedMask);
19943 }
19944 
19945 /// If the shuffle mask is taking exactly one element from the first vector
19946 /// operand and passing through all other elements from the second vector
19947 /// operand, return the index of the mask element that is choosing an element
19948 /// from the first operand. Otherwise, return -1.
19949 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
19950   int MaskSize = Mask.size();
19951   int EltFromOp0 = -1;
19952   // TODO: This does not match if there are undef elements in the shuffle mask.
19953   // Should we ignore undefs in the shuffle mask instead? The trade-off is
19954   // removing an instruction (a shuffle), but losing the knowledge that some
19955   // vector lanes are not needed.
19956   for (int i = 0; i != MaskSize; ++i) {
19957     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
19958       // We're looking for a shuffle of exactly one element from operand 0.
19959       if (EltFromOp0 != -1)
19960         return -1;
19961       EltFromOp0 = i;
19962     } else if (Mask[i] != i + MaskSize) {
19963       // Nothing from operand 1 can change lanes.
19964       return -1;
19965     }
19966   }
19967   return EltFromOp0;
19968 }
19969 
19970 /// If a shuffle inserts exactly one element from a source vector operand into
19971 /// another vector operand and we can access the specified element as a scalar,
19972 /// then we can eliminate the shuffle.
19973 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
19974                                       SelectionDAG &DAG) {
19975   // First, check if we are taking one element of a vector and shuffling that
19976   // element into another vector.
19977   ArrayRef<int> Mask = Shuf->getMask();
19978   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
19979   SDValue Op0 = Shuf->getOperand(0);
19980   SDValue Op1 = Shuf->getOperand(1);
19981   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
19982   if (ShufOp0Index == -1) {
19983     // Commute mask and check again.
19984     ShuffleVectorSDNode::commuteMask(CommutedMask);
19985     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
19986     if (ShufOp0Index == -1)
19987       return SDValue();
19988     // Commute operands to match the commuted shuffle mask.
19989     std::swap(Op0, Op1);
19990     Mask = CommutedMask;
19991   }
19992 
19993   // The shuffle inserts exactly one element from operand 0 into operand 1.
19994   // Now see if we can access that element as a scalar via a real insert element
19995   // instruction.
19996   // TODO: We can try harder to locate the element as a scalar. Examples: it
19997   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
19998   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
19999          "Shuffle mask value must be from operand 0");
20000   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
20001     return SDValue();
20002 
20003   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
20004   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
20005     return SDValue();
20006 
20007   // There's an existing insertelement with constant insertion index, so we
20008   // don't need to check the legality/profitability of a replacement operation
20009   // that differs at most in the constant value. The target should be able to
20010   // lower any of those in a similar way. If not, legalization will expand this
20011   // to a scalar-to-vector plus shuffle.
20012   //
20013   // Note that the shuffle may move the scalar from the position that the insert
20014   // element used. Therefore, our new insert element occurs at the shuffle's
20015   // mask index value, not the insert's index value.
20016   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
20017   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
20018   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
20019                      Op1, Op0.getOperand(1), NewInsIndex);
20020 }
20021 
20022 /// If we have a unary shuffle of a shuffle, see if it can be folded away
20023 /// completely. This has the potential to lose undef knowledge because the first
20024 /// shuffle may not have an undef mask element where the second one does. So
20025 /// only call this after doing simplifications based on demanded elements.
20026 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
20027   // shuf (shuf0 X, Y, Mask0), undef, Mask
20028   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20029   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
20030     return SDValue();
20031 
20032   ArrayRef<int> Mask = Shuf->getMask();
20033   ArrayRef<int> Mask0 = Shuf0->getMask();
20034   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
20035     // Ignore undef elements.
20036     if (Mask[i] == -1)
20037       continue;
20038     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
20039 
20040     // Is the element of the shuffle operand chosen by this shuffle the same as
20041     // the element chosen by the shuffle operand itself?
20042     if (Mask0[Mask[i]] != Mask0[i])
20043       return SDValue();
20044   }
20045   // Every element of this shuffle is identical to the result of the previous
20046   // shuffle, so we can replace this value.
20047   return Shuf->getOperand(0);
20048 }
20049 
20050 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
20051   EVT VT = N->getValueType(0);
20052   unsigned NumElts = VT.getVectorNumElements();
20053 
20054   SDValue N0 = N->getOperand(0);
20055   SDValue N1 = N->getOperand(1);
20056 
20057   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
20058 
20059   // Canonicalize shuffle undef, undef -> undef
20060   if (N0.isUndef() && N1.isUndef())
20061     return DAG.getUNDEF(VT);
20062 
20063   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20064 
20065   // Canonicalize shuffle v, v -> v, undef
20066   if (N0 == N1) {
20067     SmallVector<int, 8> NewMask;
20068     for (unsigned i = 0; i != NumElts; ++i) {
20069       int Idx = SVN->getMaskElt(i);
20070       if (Idx >= (int)NumElts) Idx -= NumElts;
20071       NewMask.push_back(Idx);
20072     }
20073     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
20074   }
20075 
20076   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
20077   if (N0.isUndef())
20078     return DAG.getCommutedVectorShuffle(*SVN);
20079 
20080   // Remove references to rhs if it is undef
20081   if (N1.isUndef()) {
20082     bool Changed = false;
20083     SmallVector<int, 8> NewMask;
20084     for (unsigned i = 0; i != NumElts; ++i) {
20085       int Idx = SVN->getMaskElt(i);
20086       if (Idx >= (int)NumElts) {
20087         Idx = -1;
20088         Changed = true;
20089       }
20090       NewMask.push_back(Idx);
20091     }
20092     if (Changed)
20093       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
20094   }
20095 
20096   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
20097     return InsElt;
20098 
20099   // A shuffle of a single vector that is a splatted value can always be folded.
20100   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
20101     return V;
20102 
20103   if (SDValue V = formSplatFromShuffles(SVN, DAG))
20104     return V;
20105 
20106   // If it is a splat, check if the argument vector is another splat or a
20107   // build_vector.
20108   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
20109     int SplatIndex = SVN->getSplatIndex();
20110     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
20111         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
20112       // splat (vector_bo L, R), Index -->
20113       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
20114       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
20115       SDLoc DL(N);
20116       EVT EltVT = VT.getScalarType();
20117       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
20118       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
20119       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
20120       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
20121                                   N0.getNode()->getFlags());
20122       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
20123       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
20124       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
20125     }
20126 
20127     // If this is a bit convert that changes the element type of the vector but
20128     // not the number of vector elements, look through it.  Be careful not to
20129     // look though conversions that change things like v4f32 to v2f64.
20130     SDNode *V = N0.getNode();
20131     if (V->getOpcode() == ISD::BITCAST) {
20132       SDValue ConvInput = V->getOperand(0);
20133       if (ConvInput.getValueType().isVector() &&
20134           ConvInput.getValueType().getVectorNumElements() == NumElts)
20135         V = ConvInput.getNode();
20136     }
20137 
20138     if (V->getOpcode() == ISD::BUILD_VECTOR) {
20139       assert(V->getNumOperands() == NumElts &&
20140              "BUILD_VECTOR has wrong number of operands");
20141       SDValue Base;
20142       bool AllSame = true;
20143       for (unsigned i = 0; i != NumElts; ++i) {
20144         if (!V->getOperand(i).isUndef()) {
20145           Base = V->getOperand(i);
20146           break;
20147         }
20148       }
20149       // Splat of <u, u, u, u>, return <u, u, u, u>
20150       if (!Base.getNode())
20151         return N0;
20152       for (unsigned i = 0; i != NumElts; ++i) {
20153         if (V->getOperand(i) != Base) {
20154           AllSame = false;
20155           break;
20156         }
20157       }
20158       // Splat of <x, x, x, x>, return <x, x, x, x>
20159       if (AllSame)
20160         return N0;
20161 
20162       // Canonicalize any other splat as a build_vector.
20163       SDValue Splatted = V->getOperand(SplatIndex);
20164       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
20165       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
20166 
20167       // We may have jumped through bitcasts, so the type of the
20168       // BUILD_VECTOR may not match the type of the shuffle.
20169       if (V->getValueType(0) != VT)
20170         NewBV = DAG.getBitcast(VT, NewBV);
20171       return NewBV;
20172     }
20173   }
20174 
20175   // Simplify source operands based on shuffle mask.
20176   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20177     return SDValue(N, 0);
20178 
20179   // This is intentionally placed after demanded elements simplification because
20180   // it could eliminate knowledge of undef elements created by this shuffle.
20181   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
20182     return ShufOp;
20183 
20184   // Match shuffles that can be converted to any_vector_extend_in_reg.
20185   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
20186     return V;
20187 
20188   // Combine "truncate_vector_in_reg" style shuffles.
20189   if (SDValue V = combineTruncationShuffle(SVN, DAG))
20190     return V;
20191 
20192   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
20193       Level < AfterLegalizeVectorOps &&
20194       (N1.isUndef() ||
20195       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
20196        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
20197     if (SDValue V = partitionShuffleOfConcats(N, DAG))
20198       return V;
20199   }
20200 
20201   // A shuffle of a concat of the same narrow vector can be reduced to use
20202   // only low-half elements of a concat with undef:
20203   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
20204   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
20205       N0.getNumOperands() == 2 &&
20206       N0.getOperand(0) == N0.getOperand(1)) {
20207     int HalfNumElts = (int)NumElts / 2;
20208     SmallVector<int, 8> NewMask;
20209     for (unsigned i = 0; i != NumElts; ++i) {
20210       int Idx = SVN->getMaskElt(i);
20211       if (Idx >= HalfNumElts) {
20212         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
20213         Idx -= HalfNumElts;
20214       }
20215       NewMask.push_back(Idx);
20216     }
20217     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
20218       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
20219       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
20220                                    N0.getOperand(0), UndefVec);
20221       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
20222     }
20223   }
20224 
20225   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20226   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20227   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
20228     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
20229       return Res;
20230 
20231   // If this shuffle only has a single input that is a bitcasted shuffle,
20232   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
20233   // back to their original types.
20234   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
20235       N1.isUndef() && Level < AfterLegalizeVectorOps &&
20236       TLI.isTypeLegal(VT)) {
20237 
20238     SDValue BC0 = peekThroughOneUseBitcasts(N0);
20239     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
20240       EVT SVT = VT.getScalarType();
20241       EVT InnerVT = BC0->getValueType(0);
20242       EVT InnerSVT = InnerVT.getScalarType();
20243 
20244       // Determine which shuffle works with the smaller scalar type.
20245       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
20246       EVT ScaleSVT = ScaleVT.getScalarType();
20247 
20248       if (TLI.isTypeLegal(ScaleVT) &&
20249           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
20250           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
20251         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20252         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20253 
20254         // Scale the shuffle masks to the smaller scalar type.
20255         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
20256         SmallVector<int, 8> InnerMask;
20257         SmallVector<int, 8> OuterMask;
20258         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
20259         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
20260 
20261         // Merge the shuffle masks.
20262         SmallVector<int, 8> NewMask;
20263         for (int M : OuterMask)
20264           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
20265 
20266         // Test for shuffle mask legality over both commutations.
20267         SDValue SV0 = BC0->getOperand(0);
20268         SDValue SV1 = BC0->getOperand(1);
20269         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20270         if (!LegalMask) {
20271           std::swap(SV0, SV1);
20272           ShuffleVectorSDNode::commuteMask(NewMask);
20273           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20274         }
20275 
20276         if (LegalMask) {
20277           SV0 = DAG.getBitcast(ScaleVT, SV0);
20278           SV1 = DAG.getBitcast(ScaleVT, SV1);
20279           return DAG.getBitcast(
20280               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
20281         }
20282       }
20283     }
20284   }
20285 
20286   // Canonicalize shuffles according to rules:
20287   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
20288   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
20289   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
20290   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
20291       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
20292       TLI.isTypeLegal(VT)) {
20293     // The incoming shuffle must be of the same type as the result of the
20294     // current shuffle.
20295     assert(N1->getOperand(0).getValueType() == VT &&
20296            "Shuffle types don't match");
20297 
20298     SDValue SV0 = N1->getOperand(0);
20299     SDValue SV1 = N1->getOperand(1);
20300     bool HasSameOp0 = N0 == SV0;
20301     bool IsSV1Undef = SV1.isUndef();
20302     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
20303       // Commute the operands of this shuffle so that next rule
20304       // will trigger.
20305       return DAG.getCommutedVectorShuffle(*SVN);
20306   }
20307 
20308   // Try to fold according to rules:
20309   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20310   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20311   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20312   // Don't try to fold shuffles with illegal type.
20313   // Only fold if this shuffle is the only user of the other shuffle.
20314   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
20315       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
20316     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
20317 
20318     // Don't try to fold splats; they're likely to simplify somehow, or they
20319     // might be free.
20320     if (OtherSV->isSplat())
20321       return SDValue();
20322 
20323     // The incoming shuffle must be of the same type as the result of the
20324     // current shuffle.
20325     assert(OtherSV->getOperand(0).getValueType() == VT &&
20326            "Shuffle types don't match");
20327 
20328     SDValue SV0, SV1;
20329     SmallVector<int, 4> Mask;
20330     // Compute the combined shuffle mask for a shuffle with SV0 as the first
20331     // operand, and SV1 as the second operand.
20332     for (unsigned i = 0; i != NumElts; ++i) {
20333       int Idx = SVN->getMaskElt(i);
20334       if (Idx < 0) {
20335         // Propagate Undef.
20336         Mask.push_back(Idx);
20337         continue;
20338       }
20339 
20340       SDValue CurrentVec;
20341       if (Idx < (int)NumElts) {
20342         // This shuffle index refers to the inner shuffle N0. Lookup the inner
20343         // shuffle mask to identify which vector is actually referenced.
20344         Idx = OtherSV->getMaskElt(Idx);
20345         if (Idx < 0) {
20346           // Propagate Undef.
20347           Mask.push_back(Idx);
20348           continue;
20349         }
20350 
20351         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
20352                                            : OtherSV->getOperand(1);
20353       } else {
20354         // This shuffle index references an element within N1.
20355         CurrentVec = N1;
20356       }
20357 
20358       // Simple case where 'CurrentVec' is UNDEF.
20359       if (CurrentVec.isUndef()) {
20360         Mask.push_back(-1);
20361         continue;
20362       }
20363 
20364       // Canonicalize the shuffle index. We don't know yet if CurrentVec
20365       // will be the first or second operand of the combined shuffle.
20366       Idx = Idx % NumElts;
20367       if (!SV0.getNode() || SV0 == CurrentVec) {
20368         // Ok. CurrentVec is the left hand side.
20369         // Update the mask accordingly.
20370         SV0 = CurrentVec;
20371         Mask.push_back(Idx);
20372         continue;
20373       }
20374 
20375       // Bail out if we cannot convert the shuffle pair into a single shuffle.
20376       if (SV1.getNode() && SV1 != CurrentVec)
20377         return SDValue();
20378 
20379       // Ok. CurrentVec is the right hand side.
20380       // Update the mask accordingly.
20381       SV1 = CurrentVec;
20382       Mask.push_back(Idx + NumElts);
20383     }
20384 
20385     // Check if all indices in Mask are Undef. In case, propagate Undef.
20386     bool isUndefMask = true;
20387     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
20388       isUndefMask &= Mask[i] < 0;
20389 
20390     if (isUndefMask)
20391       return DAG.getUNDEF(VT);
20392 
20393     if (!SV0.getNode())
20394       SV0 = DAG.getUNDEF(VT);
20395     if (!SV1.getNode())
20396       SV1 = DAG.getUNDEF(VT);
20397 
20398     // Avoid introducing shuffles with illegal mask.
20399     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20400     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20401     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20402     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
20403     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
20404     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
20405     return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
20406   }
20407 
20408   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
20409     return V;
20410 
20411   return SDValue();
20412 }
20413 
20414 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
20415   SDValue InVal = N->getOperand(0);
20416   EVT VT = N->getValueType(0);
20417 
20418   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
20419   // with a VECTOR_SHUFFLE and possible truncate.
20420   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20421       VT.isFixedLengthVector() &&
20422       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
20423     SDValue InVec = InVal->getOperand(0);
20424     SDValue EltNo = InVal->getOperand(1);
20425     auto InVecT = InVec.getValueType();
20426     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
20427       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
20428       int Elt = C0->getZExtValue();
20429       NewMask[0] = Elt;
20430       // If we have an implict truncate do truncate here as long as it's legal.
20431       // if it's not legal, this should
20432       if (VT.getScalarType() != InVal.getValueType() &&
20433           InVal.getValueType().isScalarInteger() &&
20434           isTypeLegal(VT.getScalarType())) {
20435         SDValue Val =
20436             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
20437         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
20438       }
20439       if (VT.getScalarType() == InVecT.getScalarType() &&
20440           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
20441         SDValue LegalShuffle =
20442           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
20443                                       DAG.getUNDEF(InVecT), NewMask, DAG);
20444         if (LegalShuffle) {
20445           // If the initial vector is the correct size this shuffle is a
20446           // valid result.
20447           if (VT == InVecT)
20448             return LegalShuffle;
20449           // If not we must truncate the vector.
20450           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
20451             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
20452             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
20453                                          InVecT.getVectorElementType(),
20454                                          VT.getVectorNumElements());
20455             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
20456                                LegalShuffle, ZeroIdx);
20457           }
20458         }
20459       }
20460     }
20461   }
20462 
20463   return SDValue();
20464 }
20465 
20466 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
20467   EVT VT = N->getValueType(0);
20468   SDValue N0 = N->getOperand(0);
20469   SDValue N1 = N->getOperand(1);
20470   SDValue N2 = N->getOperand(2);
20471   uint64_t InsIdx = N->getConstantOperandVal(2);
20472 
20473   // If inserting an UNDEF, just return the original vector.
20474   if (N1.isUndef())
20475     return N0;
20476 
20477   // If this is an insert of an extracted vector into an undef vector, we can
20478   // just use the input to the extract.
20479   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20480       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
20481     return N1.getOperand(0);
20482 
20483   // If we are inserting a bitcast value into an undef, with the same
20484   // number of elements, just use the bitcast input of the extract.
20485   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
20486   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
20487   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
20488       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20489       N1.getOperand(0).getOperand(1) == N2 &&
20490       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
20491           VT.getVectorNumElements() &&
20492       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
20493           VT.getSizeInBits()) {
20494     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
20495   }
20496 
20497   // If both N1 and N2 are bitcast values on which insert_subvector
20498   // would makes sense, pull the bitcast through.
20499   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
20500   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
20501   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
20502     SDValue CN0 = N0.getOperand(0);
20503     SDValue CN1 = N1.getOperand(0);
20504     EVT CN0VT = CN0.getValueType();
20505     EVT CN1VT = CN1.getValueType();
20506     if (CN0VT.isVector() && CN1VT.isVector() &&
20507         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
20508         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
20509       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
20510                                       CN0.getValueType(), CN0, CN1, N2);
20511       return DAG.getBitcast(VT, NewINSERT);
20512     }
20513   }
20514 
20515   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
20516   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
20517   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
20518   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
20519       N0.getOperand(1).getValueType() == N1.getValueType() &&
20520       N0.getOperand(2) == N2)
20521     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
20522                        N1, N2);
20523 
20524   // Eliminate an intermediate insert into an undef vector:
20525   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
20526   // insert_subvector undef, X, N2
20527   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
20528       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
20529     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
20530                        N1.getOperand(1), N2);
20531 
20532   // Push subvector bitcasts to the output, adjusting the index as we go.
20533   // insert_subvector(bitcast(v), bitcast(s), c1)
20534   // -> bitcast(insert_subvector(v, s, c2))
20535   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
20536       N1.getOpcode() == ISD::BITCAST) {
20537     SDValue N0Src = peekThroughBitcasts(N0);
20538     SDValue N1Src = peekThroughBitcasts(N1);
20539     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
20540     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
20541     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
20542         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
20543       EVT NewVT;
20544       SDLoc DL(N);
20545       SDValue NewIdx;
20546       LLVMContext &Ctx = *DAG.getContext();
20547       unsigned NumElts = VT.getVectorNumElements();
20548       unsigned EltSizeInBits = VT.getScalarSizeInBits();
20549       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
20550         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
20551         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
20552         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
20553       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
20554         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
20555         if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
20556           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
20557           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
20558         }
20559       }
20560       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
20561         SDValue Res = DAG.getBitcast(NewVT, N0Src);
20562         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
20563         return DAG.getBitcast(VT, Res);
20564       }
20565     }
20566   }
20567 
20568   // Canonicalize insert_subvector dag nodes.
20569   // Example:
20570   // (insert_subvector (insert_subvector A, Idx0), Idx1)
20571   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
20572   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
20573       N1.getValueType() == N0.getOperand(1).getValueType()) {
20574     unsigned OtherIdx = N0.getConstantOperandVal(2);
20575     if (InsIdx < OtherIdx) {
20576       // Swap nodes.
20577       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
20578                                   N0.getOperand(0), N1, N2);
20579       AddToWorklist(NewOp.getNode());
20580       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
20581                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
20582     }
20583   }
20584 
20585   // If the input vector is a concatenation, and the insert replaces
20586   // one of the pieces, we can optimize into a single concat_vectors.
20587   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
20588       N0.getOperand(0).getValueType() == N1.getValueType()) {
20589     unsigned Factor = N1.getValueType().getVectorNumElements();
20590     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
20591     Ops[InsIdx / Factor] = N1;
20592     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20593   }
20594 
20595   // Simplify source operands based on insertion.
20596   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20597     return SDValue(N, 0);
20598 
20599   return SDValue();
20600 }
20601 
20602 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
20603   SDValue N0 = N->getOperand(0);
20604 
20605   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
20606   if (N0->getOpcode() == ISD::FP16_TO_FP)
20607     return N0->getOperand(0);
20608 
20609   return SDValue();
20610 }
20611 
20612 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
20613   SDValue N0 = N->getOperand(0);
20614 
20615   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
20616   if (N0->getOpcode() == ISD::AND) {
20617     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
20618     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
20619       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
20620                          N0.getOperand(0));
20621     }
20622   }
20623 
20624   return SDValue();
20625 }
20626 
20627 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
20628   SDValue N0 = N->getOperand(0);
20629   EVT VT = N0.getValueType();
20630   unsigned Opcode = N->getOpcode();
20631 
20632   // VECREDUCE over 1-element vector is just an extract.
20633   if (VT.getVectorNumElements() == 1) {
20634     SDLoc dl(N);
20635     SDValue Res =
20636         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
20637                     DAG.getVectorIdxConstant(0, dl));
20638     if (Res.getValueType() != N->getValueType(0))
20639       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
20640     return Res;
20641   }
20642 
20643   // On an boolean vector an and/or reduction is the same as a umin/umax
20644   // reduction. Convert them if the latter is legal while the former isn't.
20645   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
20646     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
20647         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
20648     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
20649         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
20650         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
20651       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
20652   }
20653 
20654   return SDValue();
20655 }
20656 
20657 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
20658 /// with the destination vector and a zero vector.
20659 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
20660 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
20661 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
20662   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
20663 
20664   EVT VT = N->getValueType(0);
20665   SDValue LHS = N->getOperand(0);
20666   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
20667   SDLoc DL(N);
20668 
20669   // Make sure we're not running after operation legalization where it
20670   // may have custom lowered the vector shuffles.
20671   if (LegalOperations)
20672     return SDValue();
20673 
20674   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
20675     return SDValue();
20676 
20677   EVT RVT = RHS.getValueType();
20678   unsigned NumElts = RHS.getNumOperands();
20679 
20680   // Attempt to create a valid clear mask, splitting the mask into
20681   // sub elements and checking to see if each is
20682   // all zeros or all ones - suitable for shuffle masking.
20683   auto BuildClearMask = [&](int Split) {
20684     int NumSubElts = NumElts * Split;
20685     int NumSubBits = RVT.getScalarSizeInBits() / Split;
20686 
20687     SmallVector<int, 8> Indices;
20688     for (int i = 0; i != NumSubElts; ++i) {
20689       int EltIdx = i / Split;
20690       int SubIdx = i % Split;
20691       SDValue Elt = RHS.getOperand(EltIdx);
20692       // X & undef --> 0 (not undef). So this lane must be converted to choose
20693       // from the zero constant vector (same as if the element had all 0-bits).
20694       if (Elt.isUndef()) {
20695         Indices.push_back(i + NumSubElts);
20696         continue;
20697       }
20698 
20699       APInt Bits;
20700       if (isa<ConstantSDNode>(Elt))
20701         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
20702       else if (isa<ConstantFPSDNode>(Elt))
20703         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
20704       else
20705         return SDValue();
20706 
20707       // Extract the sub element from the constant bit mask.
20708       if (DAG.getDataLayout().isBigEndian())
20709         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
20710       else
20711         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
20712 
20713       if (Bits.isAllOnesValue())
20714         Indices.push_back(i);
20715       else if (Bits == 0)
20716         Indices.push_back(i + NumSubElts);
20717       else
20718         return SDValue();
20719     }
20720 
20721     // Let's see if the target supports this vector_shuffle.
20722     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
20723     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
20724     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
20725       return SDValue();
20726 
20727     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
20728     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
20729                                                    DAG.getBitcast(ClearVT, LHS),
20730                                                    Zero, Indices));
20731   };
20732 
20733   // Determine maximum split level (byte level masking).
20734   int MaxSplit = 1;
20735   if (RVT.getScalarSizeInBits() % 8 == 0)
20736     MaxSplit = RVT.getScalarSizeInBits() / 8;
20737 
20738   for (int Split = 1; Split <= MaxSplit; ++Split)
20739     if (RVT.getScalarSizeInBits() % Split == 0)
20740       if (SDValue S = BuildClearMask(Split))
20741         return S;
20742 
20743   return SDValue();
20744 }
20745 
20746 /// If a vector binop is performed on splat values, it may be profitable to
20747 /// extract, scalarize, and insert/splat.
20748 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
20749   SDValue N0 = N->getOperand(0);
20750   SDValue N1 = N->getOperand(1);
20751   unsigned Opcode = N->getOpcode();
20752   EVT VT = N->getValueType(0);
20753   EVT EltVT = VT.getVectorElementType();
20754   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20755 
20756   // TODO: Remove/replace the extract cost check? If the elements are available
20757   //       as scalars, then there may be no extract cost. Should we ask if
20758   //       inserting a scalar back into a vector is cheap instead?
20759   int Index0, Index1;
20760   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
20761   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
20762   if (!Src0 || !Src1 || Index0 != Index1 ||
20763       Src0.getValueType().getVectorElementType() != EltVT ||
20764       Src1.getValueType().getVectorElementType() != EltVT ||
20765       !TLI.isExtractVecEltCheap(VT, Index0) ||
20766       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
20767     return SDValue();
20768 
20769   SDLoc DL(N);
20770   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
20771   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
20772   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
20773   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
20774 
20775   // If all lanes but 1 are undefined, no need to splat the scalar result.
20776   // TODO: Keep track of undefs and use that info in the general case.
20777   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
20778       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
20779       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
20780     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
20781     // build_vec ..undef, (bo X, Y), undef...
20782     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
20783     Ops[Index0] = ScalarBO;
20784     return DAG.getBuildVector(VT, DL, Ops);
20785   }
20786 
20787   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
20788   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
20789   return DAG.getBuildVector(VT, DL, Ops);
20790 }
20791 
20792 /// Visit a binary vector operation, like ADD.
20793 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
20794   assert(N->getValueType(0).isVector() &&
20795          "SimplifyVBinOp only works on vectors!");
20796 
20797   SDValue LHS = N->getOperand(0);
20798   SDValue RHS = N->getOperand(1);
20799   SDValue Ops[] = {LHS, RHS};
20800   EVT VT = N->getValueType(0);
20801   unsigned Opcode = N->getOpcode();
20802   SDNodeFlags Flags = N->getFlags();
20803 
20804   // See if we can constant fold the vector operation.
20805   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
20806           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
20807     return Fold;
20808 
20809   // Move unary shuffles with identical masks after a vector binop:
20810   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
20811   //   --> shuffle (VBinOp A, B), Undef, Mask
20812   // This does not require type legality checks because we are creating the
20813   // same types of operations that are in the original sequence. We do have to
20814   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
20815   // though. This code is adapted from the identical transform in instcombine.
20816   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
20817       Opcode != ISD::UREM && Opcode != ISD::SREM &&
20818       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
20819     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
20820     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
20821     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
20822         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
20823         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
20824       SDLoc DL(N);
20825       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
20826                                      RHS.getOperand(0), Flags);
20827       SDValue UndefV = LHS.getOperand(1);
20828       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
20829     }
20830 
20831     // Try to sink a splat shuffle after a binop with a uniform constant.
20832     // This is limited to cases where neither the shuffle nor the constant have
20833     // undefined elements because that could be poison-unsafe or inhibit
20834     // demanded elements analysis. It is further limited to not change a splat
20835     // of an inserted scalar because that may be optimized better by
20836     // load-folding or other target-specific behaviors.
20837     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
20838         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
20839         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
20840       // binop (splat X), (splat C) --> splat (binop X, C)
20841       SDLoc DL(N);
20842       SDValue X = Shuf0->getOperand(0);
20843       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
20844       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
20845                                   Shuf0->getMask());
20846     }
20847     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
20848         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
20849         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
20850       // binop (splat C), (splat X) --> splat (binop C, X)
20851       SDLoc DL(N);
20852       SDValue X = Shuf1->getOperand(0);
20853       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
20854       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
20855                                   Shuf1->getMask());
20856     }
20857   }
20858 
20859   // The following pattern is likely to emerge with vector reduction ops. Moving
20860   // the binary operation ahead of insertion may allow using a narrower vector
20861   // instruction that has better performance than the wide version of the op:
20862   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
20863   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
20864       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
20865       LHS.getOperand(2) == RHS.getOperand(2) &&
20866       (LHS.hasOneUse() || RHS.hasOneUse())) {
20867     SDValue X = LHS.getOperand(1);
20868     SDValue Y = RHS.getOperand(1);
20869     SDValue Z = LHS.getOperand(2);
20870     EVT NarrowVT = X.getValueType();
20871     if (NarrowVT == Y.getValueType() &&
20872         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20873       // (binop undef, undef) may not return undef, so compute that result.
20874       SDLoc DL(N);
20875       SDValue VecC =
20876           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
20877       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
20878       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
20879     }
20880   }
20881 
20882   // Make sure all but the first op are undef or constant.
20883   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
20884     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
20885            std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
20886                      [](const SDValue &Op) {
20887                        return Op.isUndef() ||
20888                               ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
20889                      });
20890   };
20891 
20892   // The following pattern is likely to emerge with vector reduction ops. Moving
20893   // the binary operation ahead of the concat may allow using a narrower vector
20894   // instruction that has better performance than the wide version of the op:
20895   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
20896   //   concat (VBinOp X, Y), VecC
20897   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
20898       (LHS.hasOneUse() || RHS.hasOneUse())) {
20899     EVT NarrowVT = LHS.getOperand(0).getValueType();
20900     if (NarrowVT == RHS.getOperand(0).getValueType() &&
20901         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20902       SDLoc DL(N);
20903       unsigned NumOperands = LHS.getNumOperands();
20904       SmallVector<SDValue, 4> ConcatOps;
20905       for (unsigned i = 0; i != NumOperands; ++i) {
20906         // This constant fold for operands 1 and up.
20907         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
20908                                         RHS.getOperand(i)));
20909       }
20910 
20911       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20912     }
20913   }
20914 
20915   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
20916     return V;
20917 
20918   return SDValue();
20919 }
20920 
20921 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
20922                                     SDValue N2) {
20923   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
20924 
20925   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
20926                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
20927 
20928   // If we got a simplified select_cc node back from SimplifySelectCC, then
20929   // break it down into a new SETCC node, and a new SELECT node, and then return
20930   // the SELECT node, since we were called with a SELECT node.
20931   if (SCC.getNode()) {
20932     // Check to see if we got a select_cc back (to turn into setcc/select).
20933     // Otherwise, just return whatever node we got back, like fabs.
20934     if (SCC.getOpcode() == ISD::SELECT_CC) {
20935       const SDNodeFlags Flags = N0.getNode()->getFlags();
20936       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
20937                                   N0.getValueType(),
20938                                   SCC.getOperand(0), SCC.getOperand(1),
20939                                   SCC.getOperand(4), Flags);
20940       AddToWorklist(SETCC.getNode());
20941       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
20942                                          SCC.getOperand(2), SCC.getOperand(3));
20943       SelectNode->setFlags(Flags);
20944       return SelectNode;
20945     }
20946 
20947     return SCC;
20948   }
20949   return SDValue();
20950 }
20951 
20952 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
20953 /// being selected between, see if we can simplify the select.  Callers of this
20954 /// should assume that TheSelect is deleted if this returns true.  As such, they
20955 /// should return the appropriate thing (e.g. the node) back to the top-level of
20956 /// the DAG combiner loop to avoid it being looked at.
20957 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
20958                                     SDValue RHS) {
20959   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20960   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
20961   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
20962     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
20963       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
20964       SDValue Sqrt = RHS;
20965       ISD::CondCode CC;
20966       SDValue CmpLHS;
20967       const ConstantFPSDNode *Zero = nullptr;
20968 
20969       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
20970         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
20971         CmpLHS = TheSelect->getOperand(0);
20972         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
20973       } else {
20974         // SELECT or VSELECT
20975         SDValue Cmp = TheSelect->getOperand(0);
20976         if (Cmp.getOpcode() == ISD::SETCC) {
20977           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
20978           CmpLHS = Cmp.getOperand(0);
20979           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
20980         }
20981       }
20982       if (Zero && Zero->isZero() &&
20983           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
20984           CC == ISD::SETULT || CC == ISD::SETLT)) {
20985         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20986         CombineTo(TheSelect, Sqrt);
20987         return true;
20988       }
20989     }
20990   }
20991   // Cannot simplify select with vector condition
20992   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
20993 
20994   // If this is a select from two identical things, try to pull the operation
20995   // through the select.
20996   if (LHS.getOpcode() != RHS.getOpcode() ||
20997       !LHS.hasOneUse() || !RHS.hasOneUse())
20998     return false;
20999 
21000   // If this is a load and the token chain is identical, replace the select
21001   // of two loads with a load through a select of the address to load from.
21002   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
21003   // constants have been dropped into the constant pool.
21004   if (LHS.getOpcode() == ISD::LOAD) {
21005     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
21006     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
21007 
21008     // Token chains must be identical.
21009     if (LHS.getOperand(0) != RHS.getOperand(0) ||
21010         // Do not let this transformation reduce the number of volatile loads.
21011         // Be conservative for atomics for the moment
21012         // TODO: This does appear to be legal for unordered atomics (see D66309)
21013         !LLD->isSimple() || !RLD->isSimple() ||
21014         // FIXME: If either is a pre/post inc/dec load,
21015         // we'd need to split out the address adjustment.
21016         LLD->isIndexed() || RLD->isIndexed() ||
21017         // If this is an EXTLOAD, the VT's must match.
21018         LLD->getMemoryVT() != RLD->getMemoryVT() ||
21019         // If this is an EXTLOAD, the kind of extension must match.
21020         (LLD->getExtensionType() != RLD->getExtensionType() &&
21021          // The only exception is if one of the extensions is anyext.
21022          LLD->getExtensionType() != ISD::EXTLOAD &&
21023          RLD->getExtensionType() != ISD::EXTLOAD) ||
21024         // FIXME: this discards src value information.  This is
21025         // over-conservative. It would be beneficial to be able to remember
21026         // both potential memory locations.  Since we are discarding
21027         // src value info, don't do the transformation if the memory
21028         // locations are not in the default address space.
21029         LLD->getPointerInfo().getAddrSpace() != 0 ||
21030         RLD->getPointerInfo().getAddrSpace() != 0 ||
21031         // We can't produce a CMOV of a TargetFrameIndex since we won't
21032         // generate the address generation required.
21033         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21034         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21035         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
21036                                       LLD->getBasePtr().getValueType()))
21037       return false;
21038 
21039     // The loads must not depend on one another.
21040     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
21041       return false;
21042 
21043     // Check that the select condition doesn't reach either load.  If so,
21044     // folding this will induce a cycle into the DAG.  If not, this is safe to
21045     // xform, so create a select of the addresses.
21046 
21047     SmallPtrSet<const SDNode *, 32> Visited;
21048     SmallVector<const SDNode *, 16> Worklist;
21049 
21050     // Always fail if LLD and RLD are not independent. TheSelect is a
21051     // predecessor to all Nodes in question so we need not search past it.
21052 
21053     Visited.insert(TheSelect);
21054     Worklist.push_back(LLD);
21055     Worklist.push_back(RLD);
21056 
21057     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
21058         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
21059       return false;
21060 
21061     SDValue Addr;
21062     if (TheSelect->getOpcode() == ISD::SELECT) {
21063       // We cannot do this optimization if any pair of {RLD, LLD} is a
21064       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
21065       // Loads, we only need to check if CondNode is a successor to one of the
21066       // loads. We can further avoid this if there's no use of their chain
21067       // value.
21068       SDNode *CondNode = TheSelect->getOperand(0).getNode();
21069       Worklist.push_back(CondNode);
21070 
21071       if ((LLD->hasAnyUseOfValue(1) &&
21072            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21073           (RLD->hasAnyUseOfValue(1) &&
21074            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21075         return false;
21076 
21077       Addr = DAG.getSelect(SDLoc(TheSelect),
21078                            LLD->getBasePtr().getValueType(),
21079                            TheSelect->getOperand(0), LLD->getBasePtr(),
21080                            RLD->getBasePtr());
21081     } else {  // Otherwise SELECT_CC
21082       // We cannot do this optimization if any pair of {RLD, LLD} is a
21083       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
21084       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
21085       // one of the loads. We can further avoid this if there's no use of their
21086       // chain value.
21087 
21088       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
21089       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
21090       Worklist.push_back(CondLHS);
21091       Worklist.push_back(CondRHS);
21092 
21093       if ((LLD->hasAnyUseOfValue(1) &&
21094            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21095           (RLD->hasAnyUseOfValue(1) &&
21096            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21097         return false;
21098 
21099       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
21100                          LLD->getBasePtr().getValueType(),
21101                          TheSelect->getOperand(0),
21102                          TheSelect->getOperand(1),
21103                          LLD->getBasePtr(), RLD->getBasePtr(),
21104                          TheSelect->getOperand(4));
21105     }
21106 
21107     SDValue Load;
21108     // It is safe to replace the two loads if they have different alignments,
21109     // but the new load must be the minimum (most restrictive) alignment of the
21110     // inputs.
21111     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
21112     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
21113     if (!RLD->isInvariant())
21114       MMOFlags &= ~MachineMemOperand::MOInvariant;
21115     if (!RLD->isDereferenceable())
21116       MMOFlags &= ~MachineMemOperand::MODereferenceable;
21117     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
21118       // FIXME: Discards pointer and AA info.
21119       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
21120                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
21121                          MMOFlags);
21122     } else {
21123       // FIXME: Discards pointer and AA info.
21124       Load = DAG.getExtLoad(
21125           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
21126                                                   : LLD->getExtensionType(),
21127           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
21128           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
21129     }
21130 
21131     // Users of the select now use the result of the load.
21132     CombineTo(TheSelect, Load);
21133 
21134     // Users of the old loads now use the new load's chain.  We know the
21135     // old-load value is dead now.
21136     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
21137     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
21138     return true;
21139   }
21140 
21141   return false;
21142 }
21143 
21144 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
21145 /// bitwise 'and'.
21146 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
21147                                             SDValue N1, SDValue N2, SDValue N3,
21148                                             ISD::CondCode CC) {
21149   // If this is a select where the false operand is zero and the compare is a
21150   // check of the sign bit, see if we can perform the "gzip trick":
21151   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
21152   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
21153   EVT XType = N0.getValueType();
21154   EVT AType = N2.getValueType();
21155   if (!isNullConstant(N3) || !XType.bitsGE(AType))
21156     return SDValue();
21157 
21158   // If the comparison is testing for a positive value, we have to invert
21159   // the sign bit mask, so only do that transform if the target has a bitwise
21160   // 'and not' instruction (the invert is free).
21161   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
21162     // (X > -1) ? A : 0
21163     // (X >  0) ? X : 0 <-- This is canonical signed max.
21164     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
21165       return SDValue();
21166   } else if (CC == ISD::SETLT) {
21167     // (X <  0) ? A : 0
21168     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
21169     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
21170       return SDValue();
21171   } else {
21172     return SDValue();
21173   }
21174 
21175   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
21176   // constant.
21177   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
21178   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21179   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
21180     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
21181     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
21182       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21183       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
21184       AddToWorklist(Shift.getNode());
21185 
21186       if (XType.bitsGT(AType)) {
21187         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21188         AddToWorklist(Shift.getNode());
21189       }
21190 
21191       if (CC == ISD::SETGT)
21192         Shift = DAG.getNOT(DL, Shift, AType);
21193 
21194       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21195     }
21196   }
21197 
21198   unsigned ShCt = XType.getSizeInBits() - 1;
21199   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
21200     return SDValue();
21201 
21202   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21203   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
21204   AddToWorklist(Shift.getNode());
21205 
21206   if (XType.bitsGT(AType)) {
21207     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21208     AddToWorklist(Shift.getNode());
21209   }
21210 
21211   if (CC == ISD::SETGT)
21212     Shift = DAG.getNOT(DL, Shift, AType);
21213 
21214   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21215 }
21216 
21217 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
21218 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
21219 /// in it. This may be a win when the constant is not otherwise available
21220 /// because it replaces two constant pool loads with one.
21221 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
21222     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
21223     ISD::CondCode CC) {
21224   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
21225     return SDValue();
21226 
21227   // If we are before legalize types, we want the other legalization to happen
21228   // first (for example, to avoid messing with soft float).
21229   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
21230   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
21231   EVT VT = N2.getValueType();
21232   if (!TV || !FV || !TLI.isTypeLegal(VT))
21233     return SDValue();
21234 
21235   // If a constant can be materialized without loads, this does not make sense.
21236   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
21237       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
21238       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
21239     return SDValue();
21240 
21241   // If both constants have multiple uses, then we won't need to do an extra
21242   // load. The values are likely around in registers for other users.
21243   if (!TV->hasOneUse() && !FV->hasOneUse())
21244     return SDValue();
21245 
21246   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
21247                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
21248   Type *FPTy = Elts[0]->getType();
21249   const DataLayout &TD = DAG.getDataLayout();
21250 
21251   // Create a ConstantArray of the two constants.
21252   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
21253   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
21254                                       TD.getPrefTypeAlign(FPTy));
21255   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
21256 
21257   // Get offsets to the 0 and 1 elements of the array, so we can select between
21258   // them.
21259   SDValue Zero = DAG.getIntPtrConstant(0, DL);
21260   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
21261   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
21262   SDValue Cond =
21263       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
21264   AddToWorklist(Cond.getNode());
21265   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
21266   AddToWorklist(CstOffset.getNode());
21267   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
21268   AddToWorklist(CPIdx.getNode());
21269   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
21270                      MachinePointerInfo::getConstantPool(
21271                          DAG.getMachineFunction()), Alignment);
21272 }
21273 
21274 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
21275 /// where 'cond' is the comparison specified by CC.
21276 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
21277                                       SDValue N2, SDValue N3, ISD::CondCode CC,
21278                                       bool NotExtCompare) {
21279   // (x ? y : y) -> y.
21280   if (N2 == N3) return N2;
21281 
21282   EVT CmpOpVT = N0.getValueType();
21283   EVT CmpResVT = getSetCCResultType(CmpOpVT);
21284   EVT VT = N2.getValueType();
21285   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
21286   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21287   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
21288 
21289   // Determine if the condition we're dealing with is constant.
21290   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
21291     AddToWorklist(SCC.getNode());
21292     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
21293       // fold select_cc true, x, y -> x
21294       // fold select_cc false, x, y -> y
21295       return !(SCCC->isNullValue()) ? N2 : N3;
21296     }
21297   }
21298 
21299   if (SDValue V =
21300           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
21301     return V;
21302 
21303   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
21304     return V;
21305 
21306   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
21307   // where y is has a single bit set.
21308   // A plaintext description would be, we can turn the SELECT_CC into an AND
21309   // when the condition can be materialized as an all-ones register.  Any
21310   // single bit-test can be materialized as an all-ones register with
21311   // shift-left and shift-right-arith.
21312   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
21313       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
21314     SDValue AndLHS = N0->getOperand(0);
21315     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21316     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
21317       // Shift the tested bit over the sign bit.
21318       const APInt &AndMask = ConstAndRHS->getAPIntValue();
21319       unsigned ShCt = AndMask.getBitWidth() - 1;
21320       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
21321         SDValue ShlAmt =
21322           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
21323                           getShiftAmountTy(AndLHS.getValueType()));
21324         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
21325 
21326         // Now arithmetic right shift it all the way over, so the result is
21327         // either all-ones, or zero.
21328         SDValue ShrAmt =
21329           DAG.getConstant(ShCt, SDLoc(Shl),
21330                           getShiftAmountTy(Shl.getValueType()));
21331         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
21332 
21333         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
21334       }
21335     }
21336   }
21337 
21338   // fold select C, 16, 0 -> shl C, 4
21339   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
21340   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
21341 
21342   if ((Fold || Swap) &&
21343       TLI.getBooleanContents(CmpOpVT) ==
21344           TargetLowering::ZeroOrOneBooleanContent &&
21345       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
21346 
21347     if (Swap) {
21348       CC = ISD::getSetCCInverse(CC, CmpOpVT);
21349       std::swap(N2C, N3C);
21350     }
21351 
21352     // If the caller doesn't want us to simplify this into a zext of a compare,
21353     // don't do it.
21354     if (NotExtCompare && N2C->isOne())
21355       return SDValue();
21356 
21357     SDValue Temp, SCC;
21358     // zext (setcc n0, n1)
21359     if (LegalTypes) {
21360       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
21361       if (VT.bitsLT(SCC.getValueType()))
21362         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
21363       else
21364         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21365     } else {
21366       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
21367       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21368     }
21369 
21370     AddToWorklist(SCC.getNode());
21371     AddToWorklist(Temp.getNode());
21372 
21373     if (N2C->isOne())
21374       return Temp;
21375 
21376     unsigned ShCt = N2C->getAPIntValue().logBase2();
21377     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
21378       return SDValue();
21379 
21380     // shl setcc result by log2 n2c
21381     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
21382                        DAG.getConstant(ShCt, SDLoc(Temp),
21383                                        getShiftAmountTy(Temp.getValueType())));
21384   }
21385 
21386   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
21387   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
21388   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
21389   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
21390   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
21391   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
21392   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
21393   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
21394   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
21395     SDValue ValueOnZero = N2;
21396     SDValue Count = N3;
21397     // If the condition is NE instead of E, swap the operands.
21398     if (CC == ISD::SETNE)
21399       std::swap(ValueOnZero, Count);
21400     // Check if the value on zero is a constant equal to the bits in the type.
21401     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
21402       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
21403         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
21404         // legal, combine to just cttz.
21405         if ((Count.getOpcode() == ISD::CTTZ ||
21406              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
21407             N0 == Count.getOperand(0) &&
21408             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
21409           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
21410         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
21411         // legal, combine to just ctlz.
21412         if ((Count.getOpcode() == ISD::CTLZ ||
21413              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
21414             N0 == Count.getOperand(0) &&
21415             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
21416           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
21417       }
21418     }
21419   }
21420 
21421   return SDValue();
21422 }
21423 
21424 /// This is a stub for TargetLowering::SimplifySetCC.
21425 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
21426                                    ISD::CondCode Cond, const SDLoc &DL,
21427                                    bool foldBooleans) {
21428   TargetLowering::DAGCombinerInfo
21429     DagCombineInfo(DAG, Level, false, this);
21430   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
21431 }
21432 
21433 /// Given an ISD::SDIV node expressing a divide by constant, return
21434 /// a DAG expression to select that will generate the same value by multiplying
21435 /// by a magic number.
21436 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21437 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
21438   // when optimising for minimum size, we don't want to expand a div to a mul
21439   // and a shift.
21440   if (DAG.getMachineFunction().getFunction().hasMinSize())
21441     return SDValue();
21442 
21443   SmallVector<SDNode *, 8> Built;
21444   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
21445     for (SDNode *N : Built)
21446       AddToWorklist(N);
21447     return S;
21448   }
21449 
21450   return SDValue();
21451 }
21452 
21453 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
21454 /// DAG expression that will generate the same value by right shifting.
21455 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
21456   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
21457   if (!C)
21458     return SDValue();
21459 
21460   // Avoid division by zero.
21461   if (C->isNullValue())
21462     return SDValue();
21463 
21464   SmallVector<SDNode *, 8> Built;
21465   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
21466     for (SDNode *N : Built)
21467       AddToWorklist(N);
21468     return S;
21469   }
21470 
21471   return SDValue();
21472 }
21473 
21474 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
21475 /// expression that will generate the same value by multiplying by a magic
21476 /// number.
21477 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21478 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
21479   // when optimising for minimum size, we don't want to expand a div to a mul
21480   // and a shift.
21481   if (DAG.getMachineFunction().getFunction().hasMinSize())
21482     return SDValue();
21483 
21484   SmallVector<SDNode *, 8> Built;
21485   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
21486     for (SDNode *N : Built)
21487       AddToWorklist(N);
21488     return S;
21489   }
21490 
21491   return SDValue();
21492 }
21493 
21494 /// Determines the LogBase2 value for a non-null input value using the
21495 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
21496 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
21497   EVT VT = V.getValueType();
21498   unsigned EltBits = VT.getScalarSizeInBits();
21499   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
21500   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
21501   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
21502   return LogBase2;
21503 }
21504 
21505 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21506 /// For the reciprocal, we need to find the zero of the function:
21507 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
21508 ///     =>
21509 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
21510 ///     does not require additional intermediate precision]
21511 /// For the last iteration, put numerator N into it to gain more precision:
21512 ///   Result = N X_i + X_i (N - N A X_i)
21513 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
21514                                       SDNodeFlags Flags) {
21515   if (LegalDAG)
21516     return SDValue();
21517 
21518   // TODO: Handle half and/or extended types?
21519   EVT VT = Op.getValueType();
21520   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21521     return SDValue();
21522 
21523   // If estimates are explicitly disabled for this function, we're done.
21524   MachineFunction &MF = DAG.getMachineFunction();
21525   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
21526   if (Enabled == TLI.ReciprocalEstimate::Disabled)
21527     return SDValue();
21528 
21529   // Estimates may be explicitly enabled for this type with a custom number of
21530   // refinement steps.
21531   int Iterations = TLI.getDivRefinementSteps(VT, MF);
21532   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
21533     AddToWorklist(Est.getNode());
21534 
21535     SDLoc DL(Op);
21536     if (Iterations) {
21537       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
21538 
21539       // Newton iterations: Est = Est + Est (N - Arg * Est)
21540       // If this is the last iteration, also multiply by the numerator.
21541       for (int i = 0; i < Iterations; ++i) {
21542         SDValue MulEst = Est;
21543 
21544         if (i == Iterations - 1) {
21545           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
21546           AddToWorklist(MulEst.getNode());
21547         }
21548 
21549         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
21550         AddToWorklist(NewEst.getNode());
21551 
21552         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
21553                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
21554         AddToWorklist(NewEst.getNode());
21555 
21556         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21557         AddToWorklist(NewEst.getNode());
21558 
21559         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
21560         AddToWorklist(Est.getNode());
21561       }
21562     } else {
21563       // If no iterations are available, multiply with N.
21564       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
21565       AddToWorklist(Est.getNode());
21566     }
21567 
21568     return Est;
21569   }
21570 
21571   return SDValue();
21572 }
21573 
21574 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21575 /// For the reciprocal sqrt, we need to find the zero of the function:
21576 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21577 ///     =>
21578 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
21579 /// As a result, we precompute A/2 prior to the iteration loop.
21580 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
21581                                          unsigned Iterations,
21582                                          SDNodeFlags Flags, bool Reciprocal) {
21583   EVT VT = Arg.getValueType();
21584   SDLoc DL(Arg);
21585   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
21586 
21587   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
21588   // this entire sequence requires only one FP constant.
21589   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
21590   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
21591 
21592   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
21593   for (unsigned i = 0; i < Iterations; ++i) {
21594     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
21595     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
21596     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
21597     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21598   }
21599 
21600   // If non-reciprocal square root is requested, multiply the result by Arg.
21601   if (!Reciprocal)
21602     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
21603 
21604   return Est;
21605 }
21606 
21607 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21608 /// For the reciprocal sqrt, we need to find the zero of the function:
21609 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21610 ///     =>
21611 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
21612 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
21613                                          unsigned Iterations,
21614                                          SDNodeFlags Flags, bool Reciprocal) {
21615   EVT VT = Arg.getValueType();
21616   SDLoc DL(Arg);
21617   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
21618   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
21619 
21620   // This routine must enter the loop below to work correctly
21621   // when (Reciprocal == false).
21622   assert(Iterations > 0);
21623 
21624   // Newton iterations for reciprocal square root:
21625   // E = (E * -0.5) * ((A * E) * E + -3.0)
21626   for (unsigned i = 0; i < Iterations; ++i) {
21627     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
21628     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
21629     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
21630 
21631     // When calculating a square root at the last iteration build:
21632     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
21633     // (notice a common subexpression)
21634     SDValue LHS;
21635     if (Reciprocal || (i + 1) < Iterations) {
21636       // RSQRT: LHS = (E * -0.5)
21637       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
21638     } else {
21639       // SQRT: LHS = (A * E) * -0.5
21640       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
21641     }
21642 
21643     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
21644   }
21645 
21646   return Est;
21647 }
21648 
21649 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
21650 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
21651 /// Op can be zero.
21652 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
21653                                            bool Reciprocal) {
21654   if (LegalDAG)
21655     return SDValue();
21656 
21657   // TODO: Handle half and/or extended types?
21658   EVT VT = Op.getValueType();
21659   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21660     return SDValue();
21661 
21662   // If estimates are explicitly disabled for this function, we're done.
21663   MachineFunction &MF = DAG.getMachineFunction();
21664   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
21665   if (Enabled == TLI.ReciprocalEstimate::Disabled)
21666     return SDValue();
21667 
21668   // Estimates may be explicitly enabled for this type with a custom number of
21669   // refinement steps.
21670   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
21671 
21672   bool UseOneConstNR = false;
21673   if (SDValue Est =
21674       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
21675                           Reciprocal)) {
21676     AddToWorklist(Est.getNode());
21677 
21678     if (Iterations) {
21679       Est = UseOneConstNR
21680             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
21681             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
21682 
21683       if (!Reciprocal) {
21684         // The estimate is now completely wrong if the input was exactly 0.0 or
21685         // possibly a denormal. Force the answer to 0.0 for those cases.
21686         SDLoc DL(Op);
21687         EVT CCVT = getSetCCResultType(VT);
21688         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
21689         DenormalMode DenormMode = DAG.getDenormalMode(VT);
21690         if (DenormMode.Input == DenormalMode::IEEE) {
21691           // This is specifically a check for the handling of denormal inputs,
21692           // not the result.
21693 
21694           // fabs(X) < SmallestNormal ? 0.0 : Est
21695           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
21696           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
21697           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
21698           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21699           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
21700           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
21701           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
21702         } else {
21703           // X == 0.0 ? 0.0 : Est
21704           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21705           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
21706           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
21707         }
21708       }
21709     }
21710     return Est;
21711   }
21712 
21713   return SDValue();
21714 }
21715 
21716 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21717   return buildSqrtEstimateImpl(Op, Flags, true);
21718 }
21719 
21720 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21721   return buildSqrtEstimateImpl(Op, Flags, false);
21722 }
21723 
21724 /// Return true if there is any possibility that the two addresses overlap.
21725 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
21726 
21727   struct MemUseCharacteristics {
21728     bool IsVolatile;
21729     bool IsAtomic;
21730     SDValue BasePtr;
21731     int64_t Offset;
21732     Optional<int64_t> NumBytes;
21733     MachineMemOperand *MMO;
21734   };
21735 
21736   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
21737     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
21738       int64_t Offset = 0;
21739       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
21740         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
21741                      ? C->getSExtValue()
21742                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
21743                            ? -1 * C->getSExtValue()
21744                            : 0;
21745       uint64_t Size =
21746           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
21747       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
21748               Offset /*base offset*/,
21749               Optional<int64_t>(Size),
21750               LSN->getMemOperand()};
21751     }
21752     if (const auto *LN = cast<LifetimeSDNode>(N))
21753       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
21754               (LN->hasOffset()) ? LN->getOffset() : 0,
21755               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
21756                                 : Optional<int64_t>(),
21757               (MachineMemOperand *)nullptr};
21758     // Default.
21759     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
21760             (int64_t)0 /*offset*/,
21761             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
21762   };
21763 
21764   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
21765                         MUC1 = getCharacteristics(Op1);
21766 
21767   // If they are to the same address, then they must be aliases.
21768   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
21769       MUC0.Offset == MUC1.Offset)
21770     return true;
21771 
21772   // If they are both volatile then they cannot be reordered.
21773   if (MUC0.IsVolatile && MUC1.IsVolatile)
21774     return true;
21775 
21776   // Be conservative about atomics for the moment
21777   // TODO: This is way overconservative for unordered atomics (see D66309)
21778   if (MUC0.IsAtomic && MUC1.IsAtomic)
21779     return true;
21780 
21781   if (MUC0.MMO && MUC1.MMO) {
21782     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21783         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21784       return false;
21785   }
21786 
21787   // Try to prove that there is aliasing, or that there is no aliasing. Either
21788   // way, we can return now. If nothing can be proved, proceed with more tests.
21789   bool IsAlias;
21790   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
21791                                        DAG, IsAlias))
21792     return IsAlias;
21793 
21794   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
21795   // either are not known.
21796   if (!MUC0.MMO || !MUC1.MMO)
21797     return true;
21798 
21799   // If one operation reads from invariant memory, and the other may store, they
21800   // cannot alias. These should really be checking the equivalent of mayWrite,
21801   // but it only matters for memory nodes other than load /store.
21802   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21803       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21804     return false;
21805 
21806   // If we know required SrcValue1 and SrcValue2 have relatively large
21807   // alignment compared to the size and offset of the access, we may be able
21808   // to prove they do not alias. This check is conservative for now to catch
21809   // cases created by splitting vector types, it only works when the offsets are
21810   // multiples of the size of the data.
21811   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
21812   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
21813   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
21814   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
21815   auto &Size0 = MUC0.NumBytes;
21816   auto &Size1 = MUC1.NumBytes;
21817   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
21818       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
21819       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
21820       SrcValOffset1 % *Size1 == 0) {
21821     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
21822     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
21823 
21824     // There is no overlap between these relatively aligned accesses of
21825     // similar size. Return no alias.
21826     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
21827       return false;
21828   }
21829 
21830   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
21831                    ? CombinerGlobalAA
21832                    : DAG.getSubtarget().useAA();
21833 #ifndef NDEBUG
21834   if (CombinerAAOnlyFunc.getNumOccurrences() &&
21835       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
21836     UseAA = false;
21837 #endif
21838 
21839   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
21840       Size0.hasValue() && Size1.hasValue()) {
21841     // Use alias analysis information.
21842     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
21843     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
21844     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
21845     AliasResult AAResult = AA->alias(
21846         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
21847                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
21848         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
21849                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
21850     if (AAResult == NoAlias)
21851       return false;
21852   }
21853 
21854   // Otherwise we have to assume they alias.
21855   return true;
21856 }
21857 
21858 /// Walk up chain skipping non-aliasing memory nodes,
21859 /// looking for aliasing nodes and adding them to the Aliases vector.
21860 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
21861                                    SmallVectorImpl<SDValue> &Aliases) {
21862   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
21863   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
21864 
21865   // Get alias information for node.
21866   // TODO: relax aliasing for unordered atomics (see D66309)
21867   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
21868 
21869   // Starting off.
21870   Chains.push_back(OriginalChain);
21871   unsigned Depth = 0;
21872 
21873   // Attempt to improve chain by a single step
21874   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
21875     switch (C.getOpcode()) {
21876     case ISD::EntryToken:
21877       // No need to mark EntryToken.
21878       C = SDValue();
21879       return true;
21880     case ISD::LOAD:
21881     case ISD::STORE: {
21882       // Get alias information for C.
21883       // TODO: Relax aliasing for unordered atomics (see D66309)
21884       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
21885                       cast<LSBaseSDNode>(C.getNode())->isSimple();
21886       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
21887         // Look further up the chain.
21888         C = C.getOperand(0);
21889         return true;
21890       }
21891       // Alias, so stop here.
21892       return false;
21893     }
21894 
21895     case ISD::CopyFromReg:
21896       // Always forward past past CopyFromReg.
21897       C = C.getOperand(0);
21898       return true;
21899 
21900     case ISD::LIFETIME_START:
21901     case ISD::LIFETIME_END: {
21902       // We can forward past any lifetime start/end that can be proven not to
21903       // alias the memory access.
21904       if (!isAlias(N, C.getNode())) {
21905         // Look further up the chain.
21906         C = C.getOperand(0);
21907         return true;
21908       }
21909       return false;
21910     }
21911     default:
21912       return false;
21913     }
21914   };
21915 
21916   // Look at each chain and determine if it is an alias.  If so, add it to the
21917   // aliases list.  If not, then continue up the chain looking for the next
21918   // candidate.
21919   while (!Chains.empty()) {
21920     SDValue Chain = Chains.pop_back_val();
21921 
21922     // Don't bother if we've seen Chain before.
21923     if (!Visited.insert(Chain.getNode()).second)
21924       continue;
21925 
21926     // For TokenFactor nodes, look at each operand and only continue up the
21927     // chain until we reach the depth limit.
21928     //
21929     // FIXME: The depth check could be made to return the last non-aliasing
21930     // chain we found before we hit a tokenfactor rather than the original
21931     // chain.
21932     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
21933       Aliases.clear();
21934       Aliases.push_back(OriginalChain);
21935       return;
21936     }
21937 
21938     if (Chain.getOpcode() == ISD::TokenFactor) {
21939       // We have to check each of the operands of the token factor for "small"
21940       // token factors, so we queue them up.  Adding the operands to the queue
21941       // (stack) in reverse order maintains the original order and increases the
21942       // likelihood that getNode will find a matching token factor (CSE.)
21943       if (Chain.getNumOperands() > 16) {
21944         Aliases.push_back(Chain);
21945         continue;
21946       }
21947       for (unsigned n = Chain.getNumOperands(); n;)
21948         Chains.push_back(Chain.getOperand(--n));
21949       ++Depth;
21950       continue;
21951     }
21952     // Everything else
21953     if (ImproveChain(Chain)) {
21954       // Updated Chain Found, Consider new chain if one exists.
21955       if (Chain.getNode())
21956         Chains.push_back(Chain);
21957       ++Depth;
21958       continue;
21959     }
21960     // No Improved Chain Possible, treat as Alias.
21961     Aliases.push_back(Chain);
21962   }
21963 }
21964 
21965 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
21966 /// (aliasing node.)
21967 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
21968   if (OptLevel == CodeGenOpt::None)
21969     return OldChain;
21970 
21971   // Ops for replacing token factor.
21972   SmallVector<SDValue, 8> Aliases;
21973 
21974   // Accumulate all the aliases to this node.
21975   GatherAllAliases(N, OldChain, Aliases);
21976 
21977   // If no operands then chain to entry token.
21978   if (Aliases.size() == 0)
21979     return DAG.getEntryNode();
21980 
21981   // If a single operand then chain to it.  We don't need to revisit it.
21982   if (Aliases.size() == 1)
21983     return Aliases[0];
21984 
21985   // Construct a custom tailored token factor.
21986   return DAG.getTokenFactor(SDLoc(N), Aliases);
21987 }
21988 
21989 namespace {
21990 // TODO: Replace with with std::monostate when we move to C++17.
21991 struct UnitT { } Unit;
21992 bool operator==(const UnitT &, const UnitT &) { return true; }
21993 bool operator!=(const UnitT &, const UnitT &) { return false; }
21994 } // namespace
21995 
21996 // This function tries to collect a bunch of potentially interesting
21997 // nodes to improve the chains of, all at once. This might seem
21998 // redundant, as this function gets called when visiting every store
21999 // node, so why not let the work be done on each store as it's visited?
22000 //
22001 // I believe this is mainly important because mergeConsecutiveStores
22002 // is unable to deal with merging stores of different sizes, so unless
22003 // we improve the chains of all the potential candidates up-front
22004 // before running mergeConsecutiveStores, it might only see some of
22005 // the nodes that will eventually be candidates, and then not be able
22006 // to go from a partially-merged state to the desired final
22007 // fully-merged state.
22008 
22009 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
22010   SmallVector<StoreSDNode *, 8> ChainedStores;
22011   StoreSDNode *STChain = St;
22012   // Intervals records which offsets from BaseIndex have been covered. In
22013   // the common case, every store writes to the immediately previous address
22014   // space and thus merged with the previous interval at insertion time.
22015 
22016   using IMap =
22017       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
22018   IMap::Allocator A;
22019   IMap Intervals(A);
22020 
22021   // This holds the base pointer, index, and the offset in bytes from the base
22022   // pointer.
22023   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22024 
22025   // We must have a base and an offset.
22026   if (!BasePtr.getBase().getNode())
22027     return false;
22028 
22029   // Do not handle stores to undef base pointers.
22030   if (BasePtr.getBase().isUndef())
22031     return false;
22032 
22033   // BaseIndexOffset assumes that offsets are fixed-size, which
22034   // is not valid for scalable vectors where the offsets are
22035   // scaled by `vscale`, so bail out early.
22036   if (St->getMemoryVT().isScalableVector())
22037     return false;
22038 
22039   // Add ST's interval.
22040   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
22041 
22042   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
22043     // If the chain has more than one use, then we can't reorder the mem ops.
22044     if (!SDValue(Chain, 0)->hasOneUse())
22045       break;
22046     // TODO: Relax for unordered atomics (see D66309)
22047     if (!Chain->isSimple() || Chain->isIndexed())
22048       break;
22049 
22050     // Find the base pointer and offset for this memory node.
22051     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
22052     // Check that the base pointer is the same as the original one.
22053     int64_t Offset;
22054     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
22055       break;
22056     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
22057     // Make sure we don't overlap with other intervals by checking the ones to
22058     // the left or right before inserting.
22059     auto I = Intervals.find(Offset);
22060     // If there's a next interval, we should end before it.
22061     if (I != Intervals.end() && I.start() < (Offset + Length))
22062       break;
22063     // If there's a previous interval, we should start after it.
22064     if (I != Intervals.begin() && (--I).stop() <= Offset)
22065       break;
22066     Intervals.insert(Offset, Offset + Length, Unit);
22067 
22068     ChainedStores.push_back(Chain);
22069     STChain = Chain;
22070   }
22071 
22072   // If we didn't find a chained store, exit.
22073   if (ChainedStores.size() == 0)
22074     return false;
22075 
22076   // Improve all chained stores (St and ChainedStores members) starting from
22077   // where the store chain ended and return single TokenFactor.
22078   SDValue NewChain = STChain->getChain();
22079   SmallVector<SDValue, 8> TFOps;
22080   for (unsigned I = ChainedStores.size(); I;) {
22081     StoreSDNode *S = ChainedStores[--I];
22082     SDValue BetterChain = FindBetterChain(S, NewChain);
22083     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
22084         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
22085     TFOps.push_back(SDValue(S, 0));
22086     ChainedStores[I] = S;
22087   }
22088 
22089   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
22090   SDValue BetterChain = FindBetterChain(St, NewChain);
22091   SDValue NewST;
22092   if (St->isTruncatingStore())
22093     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
22094                               St->getBasePtr(), St->getMemoryVT(),
22095                               St->getMemOperand());
22096   else
22097     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
22098                          St->getBasePtr(), St->getMemOperand());
22099 
22100   TFOps.push_back(NewST);
22101 
22102   // If we improved every element of TFOps, then we've lost the dependence on
22103   // NewChain to successors of St and we need to add it back to TFOps. Do so at
22104   // the beginning to keep relative order consistent with FindBetterChains.
22105   auto hasImprovedChain = [&](SDValue ST) -> bool {
22106     return ST->getOperand(0) != NewChain;
22107   };
22108   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
22109   if (AddNewChain)
22110     TFOps.insert(TFOps.begin(), NewChain);
22111 
22112   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
22113   CombineTo(St, TF);
22114 
22115   // Add TF and its operands to the worklist.
22116   AddToWorklist(TF.getNode());
22117   for (const SDValue &Op : TF->ops())
22118     AddToWorklist(Op.getNode());
22119   AddToWorklist(STChain);
22120   return true;
22121 }
22122 
22123 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
22124   if (OptLevel == CodeGenOpt::None)
22125     return false;
22126 
22127   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22128 
22129   // We must have a base and an offset.
22130   if (!BasePtr.getBase().getNode())
22131     return false;
22132 
22133   // Do not handle stores to undef base pointers.
22134   if (BasePtr.getBase().isUndef())
22135     return false;
22136 
22137   // Directly improve a chain of disjoint stores starting at St.
22138   if (parallelizeChainedStores(St))
22139     return true;
22140 
22141   // Improve St's Chain..
22142   SDValue BetterChain = FindBetterChain(St, St->getChain());
22143   if (St->getChain() != BetterChain) {
22144     replaceStoreChain(St, BetterChain);
22145     return true;
22146   }
22147   return false;
22148 }
22149 
22150 /// This is the entry point for the file.
22151 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
22152                            CodeGenOpt::Level OptLevel) {
22153   /// This is the main entry point to this class.
22154   DAGCombiner(*this, AA, OptLevel).Run(Level);
22155 }
22156