1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/MemoryLocation.h"
33 #include "llvm/CodeGen/DAGCombine.h"
34 #include "llvm/CodeGen/ISDOpcodes.h"
35 #include "llvm/CodeGen/MachineFrameInfo.h"
36 #include "llvm/CodeGen/MachineFunction.h"
37 #include "llvm/CodeGen/MachineMemOperand.h"
38 #include "llvm/CodeGen/RuntimeLibcalls.h"
39 #include "llvm/CodeGen/SelectionDAG.h"
40 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
41 #include "llvm/CodeGen/SelectionDAGNodes.h"
42 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
43 #include "llvm/CodeGen/TargetLowering.h"
44 #include "llvm/CodeGen/TargetRegisterInfo.h"
45 #include "llvm/CodeGen/TargetSubtargetInfo.h"
46 #include "llvm/CodeGen/ValueTypes.h"
47 #include "llvm/IR/Attributes.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DerivedTypes.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/LLVMContext.h"
53 #include "llvm/IR/Metadata.h"
54 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/CodeGen.h"
56 #include "llvm/Support/CommandLine.h"
57 #include "llvm/Support/Compiler.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/ErrorHandling.h"
60 #include "llvm/Support/KnownBits.h"
61 #include "llvm/Support/MachineValueType.h"
62 #include "llvm/Support/MathExtras.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Target/TargetMachine.h"
65 #include "llvm/Target/TargetOptions.h"
66 #include <algorithm>
67 #include <cassert>
68 #include <cstdint>
69 #include <functional>
70 #include <iterator>
71 #include <string>
72 #include <tuple>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "dagcombine"
78 
79 STATISTIC(NodesCombined   , "Number of dag nodes combined");
80 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
81 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
82 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
83 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
84 STATISTIC(SlicedLoads, "Number of load sliced");
85 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
86 
87 static cl::opt<bool>
88 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
89                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
90 
91 static cl::opt<bool>
92 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
93         cl::desc("Enable DAG combiner's use of TBAA"));
94 
95 #ifndef NDEBUG
96 static cl::opt<std::string>
97 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
98                    cl::desc("Only use DAG-combiner alias analysis in this"
99                             " function"));
100 #endif
101 
102 /// Hidden option to stress test load slicing, i.e., when this option
103 /// is enabled, load slicing bypasses most of its profitability guards.
104 static cl::opt<bool>
105 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
106                   cl::desc("Bypass the profitability model of load slicing"),
107                   cl::init(false));
108 
109 static cl::opt<bool>
110   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
111                     cl::desc("DAG combiner may split indexing from loads"));
112 
113 static cl::opt<bool>
114     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
115                        cl::desc("DAG combiner enable merging multiple stores "
116                                 "into a wider store"));
117 
118 static cl::opt<unsigned> TokenFactorInlineLimit(
119     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
120     cl::desc("Limit the number of operands to inline for Token Factors"));
121 
122 static cl::opt<unsigned> StoreMergeDependenceLimit(
123     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
124     cl::desc("Limit the number of times for the same StoreNode and RootNode "
125              "to bail out in store merging dependence check"));
126 
127 namespace {
128 
129   class DAGCombiner {
130     SelectionDAG &DAG;
131     const TargetLowering &TLI;
132     CombineLevel Level;
133     CodeGenOpt::Level OptLevel;
134     bool LegalDAG = false;
135     bool LegalOperations = false;
136     bool LegalTypes = false;
137     bool ForCodeSize;
138 
139     /// Worklist of all of the nodes that need to be simplified.
140     ///
141     /// This must behave as a stack -- new nodes to process are pushed onto the
142     /// back and when processing we pop off of the back.
143     ///
144     /// The worklist will not contain duplicates but may contain null entries
145     /// due to nodes being deleted from the underlying DAG.
146     SmallVector<SDNode *, 64> Worklist;
147 
148     /// Mapping from an SDNode to its position on the worklist.
149     ///
150     /// This is used to find and remove nodes from the worklist (by nulling
151     /// them) when they are deleted from the underlying DAG. It relies on
152     /// stable indices of nodes within the worklist.
153     DenseMap<SDNode *, unsigned> WorklistMap;
154     /// This records all nodes attempted to add to the worklist since we
155     /// considered a new worklist entry. As we keep do not add duplicate nodes
156     /// in the worklist, this is different from the tail of the worklist.
157     SmallSetVector<SDNode *, 32> PruningList;
158 
159     /// Set of nodes which have been combined (at least once).
160     ///
161     /// This is used to allow us to reliably add any operands of a DAG node
162     /// which have not yet been combined to the worklist.
163     SmallPtrSet<SDNode *, 32> CombinedNodes;
164 
165     /// Map from candidate StoreNode to the pair of RootNode and count.
166     /// The count is used to track how many times we have seen the StoreNode
167     /// with the same RootNode bail out in dependence check. If we have seen
168     /// the bail out for the same pair many times over a limit, we won't
169     /// consider the StoreNode with the same RootNode as store merging
170     /// candidate again.
171     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
172 
173     // AA - Used for DAG load/store alias analysis.
174     AliasAnalysis *AA;
175 
176     /// When an instruction is simplified, add all users of the instruction to
177     /// the work lists because they might get more simplified now.
178     void AddUsersToWorklist(SDNode *N) {
179       for (SDNode *Node : N->uses())
180         AddToWorklist(Node);
181     }
182 
183     /// Convenient shorthand to add a node and all of its user to the worklist.
184     void AddToWorklistWithUsers(SDNode *N) {
185       AddUsersToWorklist(N);
186       AddToWorklist(N);
187     }
188 
189     // Prune potentially dangling nodes. This is called after
190     // any visit to a node, but should also be called during a visit after any
191     // failed combine which may have created a DAG node.
192     void clearAddedDanglingWorklistEntries() {
193       // Check any nodes added to the worklist to see if they are prunable.
194       while (!PruningList.empty()) {
195         auto *N = PruningList.pop_back_val();
196         if (N->use_empty())
197           recursivelyDeleteUnusedNodes(N);
198       }
199     }
200 
201     SDNode *getNextWorklistEntry() {
202       // Before we do any work, remove nodes that are not in use.
203       clearAddedDanglingWorklistEntries();
204       SDNode *N = nullptr;
205       // The Worklist holds the SDNodes in order, but it may contain null
206       // entries.
207       while (!N && !Worklist.empty()) {
208         N = Worklist.pop_back_val();
209       }
210 
211       if (N) {
212         bool GoodWorklistEntry = WorklistMap.erase(N);
213         (void)GoodWorklistEntry;
214         assert(GoodWorklistEntry &&
215                "Found a worklist entry without a corresponding map entry!");
216       }
217       return N;
218     }
219 
220     /// Call the node-specific routine that folds each particular type of node.
221     SDValue visit(SDNode *N);
222 
223   public:
224     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
225         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
226           OptLevel(OL), AA(AA) {
227       ForCodeSize = DAG.shouldOptForSize();
228 
229       MaximumLegalStoreInBits = 0;
230       // We use the minimum store size here, since that's all we can guarantee
231       // for the scalable vector types.
232       for (MVT VT : MVT::all_valuetypes())
233         if (EVT(VT).isSimple() && VT != MVT::Other &&
234             TLI.isTypeLegal(EVT(VT)) &&
235             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
236           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
237     }
238 
239     void ConsiderForPruning(SDNode *N) {
240       // Mark this for potential pruning.
241       PruningList.insert(N);
242     }
243 
244     /// Add to the worklist making sure its instance is at the back (next to be
245     /// processed.)
246     void AddToWorklist(SDNode *N) {
247       assert(N->getOpcode() != ISD::DELETED_NODE &&
248              "Deleted Node added to Worklist");
249 
250       // Skip handle nodes as they can't usefully be combined and confuse the
251       // zero-use deletion strategy.
252       if (N->getOpcode() == ISD::HANDLENODE)
253         return;
254 
255       ConsiderForPruning(N);
256 
257       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
258         Worklist.push_back(N);
259     }
260 
261     /// Remove all instances of N from the worklist.
262     void removeFromWorklist(SDNode *N) {
263       CombinedNodes.erase(N);
264       PruningList.remove(N);
265       StoreRootCountMap.erase(N);
266 
267       auto It = WorklistMap.find(N);
268       if (It == WorklistMap.end())
269         return; // Not in the worklist.
270 
271       // Null out the entry rather than erasing it to avoid a linear operation.
272       Worklist[It->second] = nullptr;
273       WorklistMap.erase(It);
274     }
275 
276     void deleteAndRecombine(SDNode *N);
277     bool recursivelyDeleteUnusedNodes(SDNode *N);
278 
279     /// Replaces all uses of the results of one DAG node with new values.
280     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
281                       bool AddTo = true);
282 
283     /// Replaces all uses of the results of one DAG node with new values.
284     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
285       return CombineTo(N, &Res, 1, AddTo);
286     }
287 
288     /// Replaces all uses of the results of one DAG node with new values.
289     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
290                       bool AddTo = true) {
291       SDValue To[] = { Res0, Res1 };
292       return CombineTo(N, To, 2, AddTo);
293     }
294 
295     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
296 
297   private:
298     unsigned MaximumLegalStoreInBits;
299 
300     /// Check the specified integer node value to see if it can be simplified or
301     /// if things it uses can be simplified by bit propagation.
302     /// If so, return true.
303     bool SimplifyDemandedBits(SDValue Op) {
304       unsigned BitWidth = Op.getScalarValueSizeInBits();
305       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
306       return SimplifyDemandedBits(Op, DemandedBits);
307     }
308 
309     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
310       EVT VT = Op.getValueType();
311       unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
312       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
313       return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
314     }
315 
316     /// Check the specified vector node value to see if it can be simplified or
317     /// if things it uses can be simplified as it only uses some of the
318     /// elements. If so, return true.
319     bool SimplifyDemandedVectorElts(SDValue Op) {
320       unsigned NumElts = Op.getValueType().getVectorNumElements();
321       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
322       return SimplifyDemandedVectorElts(Op, DemandedElts);
323     }
324 
325     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
326                               const APInt &DemandedElts);
327     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
328                                     bool AssumeSingleUse = false);
329 
330     bool CombineToPreIndexedLoadStore(SDNode *N);
331     bool CombineToPostIndexedLoadStore(SDNode *N);
332     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
333     bool SliceUpLoad(SDNode *N);
334 
335     // Scalars have size 0 to distinguish from singleton vectors.
336     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
337     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
338     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
339 
340     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
341     ///   load.
342     ///
343     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
344     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
345     /// \param EltNo index of the vector element to load.
346     /// \param OriginalLoad load that EVE came from to be replaced.
347     /// \returns EVE on success SDValue() on failure.
348     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
349                                          SDValue EltNo,
350                                          LoadSDNode *OriginalLoad);
351     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
352     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
353     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
354     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
355     SDValue PromoteIntBinOp(SDValue Op);
356     SDValue PromoteIntShiftOp(SDValue Op);
357     SDValue PromoteExtend(SDValue Op);
358     bool PromoteLoad(SDValue Op);
359 
360     /// Call the node-specific routine that knows how to fold each
361     /// particular type of node. If that doesn't do anything, try the
362     /// target-specific DAG combines.
363     SDValue combine(SDNode *N);
364 
365     // Visitation implementation - Implement dag node combining for different
366     // node types.  The semantics are as follows:
367     // Return Value:
368     //   SDValue.getNode() == 0 - No change was made
369     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
370     //   otherwise              - N should be replaced by the returned Operand.
371     //
372     SDValue visitTokenFactor(SDNode *N);
373     SDValue visitMERGE_VALUES(SDNode *N);
374     SDValue visitADD(SDNode *N);
375     SDValue visitADDLike(SDNode *N);
376     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
377     SDValue visitSUB(SDNode *N);
378     SDValue visitADDSAT(SDNode *N);
379     SDValue visitSUBSAT(SDNode *N);
380     SDValue visitADDC(SDNode *N);
381     SDValue visitADDO(SDNode *N);
382     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
383     SDValue visitSUBC(SDNode *N);
384     SDValue visitSUBO(SDNode *N);
385     SDValue visitADDE(SDNode *N);
386     SDValue visitADDCARRY(SDNode *N);
387     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
388     SDValue visitSUBE(SDNode *N);
389     SDValue visitSUBCARRY(SDNode *N);
390     SDValue visitMUL(SDNode *N);
391     SDValue visitMULFIX(SDNode *N);
392     SDValue useDivRem(SDNode *N);
393     SDValue visitSDIV(SDNode *N);
394     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
395     SDValue visitUDIV(SDNode *N);
396     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
397     SDValue visitREM(SDNode *N);
398     SDValue visitMULHU(SDNode *N);
399     SDValue visitMULHS(SDNode *N);
400     SDValue visitSMUL_LOHI(SDNode *N);
401     SDValue visitUMUL_LOHI(SDNode *N);
402     SDValue visitMULO(SDNode *N);
403     SDValue visitIMINMAX(SDNode *N);
404     SDValue visitAND(SDNode *N);
405     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
406     SDValue visitOR(SDNode *N);
407     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
408     SDValue visitXOR(SDNode *N);
409     SDValue SimplifyVBinOp(SDNode *N);
410     SDValue visitSHL(SDNode *N);
411     SDValue visitSRA(SDNode *N);
412     SDValue visitSRL(SDNode *N);
413     SDValue visitFunnelShift(SDNode *N);
414     SDValue visitRotate(SDNode *N);
415     SDValue visitABS(SDNode *N);
416     SDValue visitBSWAP(SDNode *N);
417     SDValue visitBITREVERSE(SDNode *N);
418     SDValue visitCTLZ(SDNode *N);
419     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
420     SDValue visitCTTZ(SDNode *N);
421     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
422     SDValue visitCTPOP(SDNode *N);
423     SDValue visitSELECT(SDNode *N);
424     SDValue visitVSELECT(SDNode *N);
425     SDValue visitSELECT_CC(SDNode *N);
426     SDValue visitSETCC(SDNode *N);
427     SDValue visitSETCCCARRY(SDNode *N);
428     SDValue visitSIGN_EXTEND(SDNode *N);
429     SDValue visitZERO_EXTEND(SDNode *N);
430     SDValue visitANY_EXTEND(SDNode *N);
431     SDValue visitAssertExt(SDNode *N);
432     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
433     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
434     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
435     SDValue visitTRUNCATE(SDNode *N);
436     SDValue visitBITCAST(SDNode *N);
437     SDValue visitBUILD_PAIR(SDNode *N);
438     SDValue visitFADD(SDNode *N);
439     SDValue visitFSUB(SDNode *N);
440     SDValue visitFMUL(SDNode *N);
441     SDValue visitFMA(SDNode *N);
442     SDValue visitFDIV(SDNode *N);
443     SDValue visitFREM(SDNode *N);
444     SDValue visitFSQRT(SDNode *N);
445     SDValue visitFCOPYSIGN(SDNode *N);
446     SDValue visitFPOW(SDNode *N);
447     SDValue visitSINT_TO_FP(SDNode *N);
448     SDValue visitUINT_TO_FP(SDNode *N);
449     SDValue visitFP_TO_SINT(SDNode *N);
450     SDValue visitFP_TO_UINT(SDNode *N);
451     SDValue visitFP_ROUND(SDNode *N);
452     SDValue visitFP_EXTEND(SDNode *N);
453     SDValue visitFNEG(SDNode *N);
454     SDValue visitFABS(SDNode *N);
455     SDValue visitFCEIL(SDNode *N);
456     SDValue visitFTRUNC(SDNode *N);
457     SDValue visitFFLOOR(SDNode *N);
458     SDValue visitFMINNUM(SDNode *N);
459     SDValue visitFMAXNUM(SDNode *N);
460     SDValue visitFMINIMUM(SDNode *N);
461     SDValue visitFMAXIMUM(SDNode *N);
462     SDValue visitBRCOND(SDNode *N);
463     SDValue visitBR_CC(SDNode *N);
464     SDValue visitLOAD(SDNode *N);
465 
466     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
467     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
468 
469     SDValue visitSTORE(SDNode *N);
470     SDValue visitLIFETIME_END(SDNode *N);
471     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
472     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
473     SDValue visitBUILD_VECTOR(SDNode *N);
474     SDValue visitCONCAT_VECTORS(SDNode *N);
475     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
476     SDValue visitVECTOR_SHUFFLE(SDNode *N);
477     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
478     SDValue visitINSERT_SUBVECTOR(SDNode *N);
479     SDValue visitMLOAD(SDNode *N);
480     SDValue visitMSTORE(SDNode *N);
481     SDValue visitMGATHER(SDNode *N);
482     SDValue visitMSCATTER(SDNode *N);
483     SDValue visitFP_TO_FP16(SDNode *N);
484     SDValue visitFP16_TO_FP(SDNode *N);
485     SDValue visitVECREDUCE(SDNode *N);
486 
487     SDValue visitFADDForFMACombine(SDNode *N);
488     SDValue visitFSUBForFMACombine(SDNode *N);
489     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
490 
491     SDValue XformToShuffleWithZero(SDNode *N);
492     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
493                                                     const SDLoc &DL, SDValue N0,
494                                                     SDValue N1);
495     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
496                                       SDValue N1);
497     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
498                            SDValue N1, SDNodeFlags Flags);
499 
500     SDValue visitShiftByConstant(SDNode *N);
501 
502     SDValue foldSelectOfConstants(SDNode *N);
503     SDValue foldVSelectOfConstants(SDNode *N);
504     SDValue foldBinOpIntoSelect(SDNode *BO);
505     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
506     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
507     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
508     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
509                              SDValue N2, SDValue N3, ISD::CondCode CC,
510                              bool NotExtCompare = false);
511     SDValue convertSelectOfFPConstantsToLoadOffset(
512         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
513         ISD::CondCode CC);
514     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
515                                    SDValue N2, SDValue N3, ISD::CondCode CC);
516     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
517                               const SDLoc &DL);
518     SDValue unfoldMaskedMerge(SDNode *N);
519     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
520     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
521                           const SDLoc &DL, bool foldBooleans);
522     SDValue rebuildSetCC(SDValue N);
523 
524     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
525                            SDValue &CC, bool MatchStrict = false) const;
526     bool isOneUseSetCC(SDValue N) const;
527     bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
528 
529     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
530                                          unsigned HiOp);
531     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
532     SDValue CombineExtLoad(SDNode *N);
533     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
534     SDValue combineRepeatedFPDivisors(SDNode *N);
535     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
536     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
537     SDValue BuildSDIV(SDNode *N);
538     SDValue BuildSDIVPow2(SDNode *N);
539     SDValue BuildUDIV(SDNode *N);
540     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
541     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
542     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
543     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
544     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
545     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
546                                 SDNodeFlags Flags, bool Reciprocal);
547     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
548                                 SDNodeFlags Flags, bool Reciprocal);
549     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
550                                bool DemandHighBits = true);
551     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
552     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
553                               SDValue InnerPos, SDValue InnerNeg,
554                               unsigned PosOpcode, unsigned NegOpcode,
555                               const SDLoc &DL);
556     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
557     SDValue MatchLoadCombine(SDNode *N);
558     SDValue MatchStoreCombine(StoreSDNode *N);
559     SDValue ReduceLoadWidth(SDNode *N);
560     SDValue ReduceLoadOpStoreWidth(SDNode *N);
561     SDValue splitMergedValStore(StoreSDNode *ST);
562     SDValue TransformFPLoadStorePair(SDNode *N);
563     SDValue convertBuildVecZextToZext(SDNode *N);
564     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
565     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
566     SDValue reduceBuildVecToShuffle(SDNode *N);
567     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
568                                   ArrayRef<int> VectorMask, SDValue VecIn1,
569                                   SDValue VecIn2, unsigned LeftIdx,
570                                   bool DidSplitVec);
571     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
572 
573     /// Walk up chain skipping non-aliasing memory nodes,
574     /// looking for aliasing nodes and adding them to the Aliases vector.
575     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
576                           SmallVectorImpl<SDValue> &Aliases);
577 
578     /// Return true if there is any possibility that the two addresses overlap.
579     bool isAlias(SDNode *Op0, SDNode *Op1) const;
580 
581     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
582     /// chain (aliasing node.)
583     SDValue FindBetterChain(SDNode *N, SDValue Chain);
584 
585     /// Try to replace a store and any possibly adjacent stores on
586     /// consecutive chains with better chains. Return true only if St is
587     /// replaced.
588     ///
589     /// Notice that other chains may still be replaced even if the function
590     /// returns false.
591     bool findBetterNeighborChains(StoreSDNode *St);
592 
593     // Helper for findBetterNeighborChains. Walk up store chain add additional
594     // chained stores that do not overlap and can be parallelized.
595     bool parallelizeChainedStores(StoreSDNode *St);
596 
597     /// Holds a pointer to an LSBaseSDNode as well as information on where it
598     /// is located in a sequence of memory operations connected by a chain.
599     struct MemOpLink {
600       // Ptr to the mem node.
601       LSBaseSDNode *MemNode;
602 
603       // Offset from the base ptr.
604       int64_t OffsetFromBase;
605 
606       MemOpLink(LSBaseSDNode *N, int64_t Offset)
607           : MemNode(N), OffsetFromBase(Offset) {}
608     };
609 
610     /// This is a helper function for visitMUL to check the profitability
611     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
612     /// MulNode is the original multiply, AddNode is (add x, c1),
613     /// and ConstNode is c2.
614     bool isMulAddWithConstProfitable(SDNode *MulNode,
615                                      SDValue &AddNode,
616                                      SDValue &ConstNode);
617 
618     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
619     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
620     /// the type of the loaded value to be extended.
621     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
622                           EVT LoadResultTy, EVT &ExtVT);
623 
624     /// Helper function to calculate whether the given Load/Store can have its
625     /// width reduced to ExtVT.
626     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
627                            EVT &MemVT, unsigned ShAmt = 0);
628 
629     /// Used by BackwardsPropagateMask to find suitable loads.
630     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
631                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
632                            ConstantSDNode *Mask, SDNode *&NodeToMask);
633     /// Attempt to propagate a given AND node back to load leaves so that they
634     /// can be combined into narrow loads.
635     bool BackwardsPropagateMask(SDNode *N);
636 
637     /// Helper function for MergeConsecutiveStores which merges the
638     /// component store chains.
639     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
640                                 unsigned NumStores);
641 
642     /// This is a helper function for MergeConsecutiveStores. When the
643     /// source elements of the consecutive stores are all constants or
644     /// all extracted vector elements, try to merge them into one
645     /// larger store introducing bitcasts if necessary.  \return True
646     /// if a merged store was created.
647     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
648                                          EVT MemVT, unsigned NumStores,
649                                          bool IsConstantSrc, bool UseVector,
650                                          bool UseTrunc);
651 
652     /// This is a helper function for MergeConsecutiveStores. Stores
653     /// that potentially may be merged with St are placed in
654     /// StoreNodes. RootNode is a chain predecessor to all store
655     /// candidates.
656     void getStoreMergeCandidates(StoreSDNode *St,
657                                  SmallVectorImpl<MemOpLink> &StoreNodes,
658                                  SDNode *&Root);
659 
660     /// Helper function for MergeConsecutiveStores. Checks if
661     /// candidate stores have indirect dependency through their
662     /// operands. RootNode is the predecessor to all stores calculated
663     /// by getStoreMergeCandidates and is used to prune the dependency check.
664     /// \return True if safe to merge.
665     bool checkMergeStoreCandidatesForDependencies(
666         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
667         SDNode *RootNode);
668 
669     /// Merge consecutive store operations into a wide store.
670     /// This optimization uses wide integers or vectors when possible.
671     /// \return number of stores that were merged into a merged store (the
672     /// affected nodes are stored as a prefix in \p StoreNodes).
673     bool MergeConsecutiveStores(StoreSDNode *St);
674 
675     /// Try to transform a truncation where C is a constant:
676     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
677     ///
678     /// \p N needs to be a truncation and its first operand an AND. Other
679     /// requirements are checked by the function (e.g. that trunc is
680     /// single-use) and if missed an empty SDValue is returned.
681     SDValue distributeTruncateThroughAnd(SDNode *N);
682 
683     /// Helper function to determine whether the target supports operation
684     /// given by \p Opcode for type \p VT, that is, whether the operation
685     /// is legal or custom before legalizing operations, and whether is
686     /// legal (but not custom) after legalization.
687     bool hasOperation(unsigned Opcode, EVT VT) {
688       if (LegalOperations)
689         return TLI.isOperationLegal(Opcode, VT);
690       return TLI.isOperationLegalOrCustom(Opcode, VT);
691     }
692 
693   public:
694     /// Runs the dag combiner on all nodes in the work list
695     void Run(CombineLevel AtLevel);
696 
697     SelectionDAG &getDAG() const { return DAG; }
698 
699     /// Returns a type large enough to hold any valid shift amount - before type
700     /// legalization these can be huge.
701     EVT getShiftAmountTy(EVT LHSTy) {
702       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
703       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
704     }
705 
706     /// This method returns true if we are running before type legalization or
707     /// if the specified VT is legal.
708     bool isTypeLegal(const EVT &VT) {
709       if (!LegalTypes) return true;
710       return TLI.isTypeLegal(VT);
711     }
712 
713     /// Convenience wrapper around TargetLowering::getSetCCResultType
714     EVT getSetCCResultType(EVT VT) const {
715       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
716     }
717 
718     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
719                          SDValue OrigLoad, SDValue ExtLoad,
720                          ISD::NodeType ExtType);
721   };
722 
723 /// This class is a DAGUpdateListener that removes any deleted
724 /// nodes from the worklist.
725 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
726   DAGCombiner &DC;
727 
728 public:
729   explicit WorklistRemover(DAGCombiner &dc)
730     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
731 
732   void NodeDeleted(SDNode *N, SDNode *E) override {
733     DC.removeFromWorklist(N);
734   }
735 };
736 
737 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
738   DAGCombiner &DC;
739 
740 public:
741   explicit WorklistInserter(DAGCombiner &dc)
742       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
743 
744   // FIXME: Ideally we could add N to the worklist, but this causes exponential
745   //        compile time costs in large DAGs, e.g. Halide.
746   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
747 };
748 
749 } // end anonymous namespace
750 
751 //===----------------------------------------------------------------------===//
752 //  TargetLowering::DAGCombinerInfo implementation
753 //===----------------------------------------------------------------------===//
754 
755 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
756   ((DAGCombiner*)DC)->AddToWorklist(N);
757 }
758 
759 SDValue TargetLowering::DAGCombinerInfo::
760 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
761   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
762 }
763 
764 SDValue TargetLowering::DAGCombinerInfo::
765 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
766   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
767 }
768 
769 SDValue TargetLowering::DAGCombinerInfo::
770 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
771   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
772 }
773 
774 bool TargetLowering::DAGCombinerInfo::
775 recursivelyDeleteUnusedNodes(SDNode *N) {
776   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
777 }
778 
779 void TargetLowering::DAGCombinerInfo::
780 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
781   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
782 }
783 
784 //===----------------------------------------------------------------------===//
785 // Helper Functions
786 //===----------------------------------------------------------------------===//
787 
788 void DAGCombiner::deleteAndRecombine(SDNode *N) {
789   removeFromWorklist(N);
790 
791   // If the operands of this node are only used by the node, they will now be
792   // dead. Make sure to re-visit them and recursively delete dead nodes.
793   for (const SDValue &Op : N->ops())
794     // For an operand generating multiple values, one of the values may
795     // become dead allowing further simplification (e.g. split index
796     // arithmetic from an indexed load).
797     if (Op->hasOneUse() || Op->getNumValues() > 1)
798       AddToWorklist(Op.getNode());
799 
800   DAG.DeleteNode(N);
801 }
802 
803 // APInts must be the same size for most operations, this helper
804 // function zero extends the shorter of the pair so that they match.
805 // We provide an Offset so that we can create bitwidths that won't overflow.
806 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
807   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
808   LHS = LHS.zextOrSelf(Bits);
809   RHS = RHS.zextOrSelf(Bits);
810 }
811 
812 // Return true if this node is a setcc, or is a select_cc
813 // that selects between the target values used for true and false, making it
814 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
815 // the appropriate nodes based on the type of node we are checking. This
816 // simplifies life a bit for the callers.
817 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
818                                     SDValue &CC, bool MatchStrict) const {
819   if (N.getOpcode() == ISD::SETCC) {
820     LHS = N.getOperand(0);
821     RHS = N.getOperand(1);
822     CC  = N.getOperand(2);
823     return true;
824   }
825 
826   if (MatchStrict &&
827       (N.getOpcode() == ISD::STRICT_FSETCC ||
828        N.getOpcode() == ISD::STRICT_FSETCCS)) {
829     LHS = N.getOperand(1);
830     RHS = N.getOperand(2);
831     CC  = N.getOperand(3);
832     return true;
833   }
834 
835   if (N.getOpcode() != ISD::SELECT_CC ||
836       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
837       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
838     return false;
839 
840   if (TLI.getBooleanContents(N.getValueType()) ==
841       TargetLowering::UndefinedBooleanContent)
842     return false;
843 
844   LHS = N.getOperand(0);
845   RHS = N.getOperand(1);
846   CC  = N.getOperand(4);
847   return true;
848 }
849 
850 /// Return true if this is a SetCC-equivalent operation with only one use.
851 /// If this is true, it allows the users to invert the operation for free when
852 /// it is profitable to do so.
853 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
854   SDValue N0, N1, N2;
855   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
856     return true;
857   return false;
858 }
859 
860 // Returns the SDNode if it is a constant float BuildVector
861 // or constant float.
862 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
863   if (isa<ConstantFPSDNode>(N))
864     return N.getNode();
865   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
866     return N.getNode();
867   return nullptr;
868 }
869 
870 // Determines if it is a constant integer or a build vector of constant
871 // integers (and undefs).
872 // Do not permit build vector implicit truncation.
873 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
874   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
875     return !(Const->isOpaque() && NoOpaques);
876   if (N.getOpcode() != ISD::BUILD_VECTOR)
877     return false;
878   unsigned BitWidth = N.getScalarValueSizeInBits();
879   for (const SDValue &Op : N->op_values()) {
880     if (Op.isUndef())
881       continue;
882     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
883     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
884         (Const->isOpaque() && NoOpaques))
885       return false;
886   }
887   return true;
888 }
889 
890 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
891 // undef's.
892 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
893   if (V.getOpcode() != ISD::BUILD_VECTOR)
894     return false;
895   return isConstantOrConstantVector(V, NoOpaques) ||
896          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
897 }
898 
899 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
900                                                              const SDLoc &DL,
901                                                              SDValue N0,
902                                                              SDValue N1) {
903   // Currently this only tries to ensure we don't undo the GEP splits done by
904   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
905   // we check if the following transformation would be problematic:
906   // (load/store (add, (add, x, offset1), offset2)) ->
907   // (load/store (add, x, offset1+offset2)).
908 
909   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
910     return false;
911 
912   if (N0.hasOneUse())
913     return false;
914 
915   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
916   auto *C2 = dyn_cast<ConstantSDNode>(N1);
917   if (!C1 || !C2)
918     return false;
919 
920   const APInt &C1APIntVal = C1->getAPIntValue();
921   const APInt &C2APIntVal = C2->getAPIntValue();
922   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
923     return false;
924 
925   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
926   if (CombinedValueIntVal.getBitWidth() > 64)
927     return false;
928   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
929 
930   for (SDNode *Node : N0->uses()) {
931     auto LoadStore = dyn_cast<MemSDNode>(Node);
932     if (LoadStore) {
933       // Is x[offset2] already not a legal addressing mode? If so then
934       // reassociating the constants breaks nothing (we test offset2 because
935       // that's the one we hope to fold into the load or store).
936       TargetLoweringBase::AddrMode AM;
937       AM.HasBaseReg = true;
938       AM.BaseOffs = C2APIntVal.getSExtValue();
939       EVT VT = LoadStore->getMemoryVT();
940       unsigned AS = LoadStore->getAddressSpace();
941       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
942       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
943         continue;
944 
945       // Would x[offset1+offset2] still be a legal addressing mode?
946       AM.BaseOffs = CombinedValue;
947       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
948         return true;
949     }
950   }
951 
952   return false;
953 }
954 
955 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
956 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
957 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
958                                                SDValue N0, SDValue N1) {
959   EVT VT = N0.getValueType();
960 
961   if (N0.getOpcode() != Opc)
962     return SDValue();
963 
964   // Don't reassociate reductions.
965   if (N0->getFlags().hasVectorReduction())
966     return SDValue();
967 
968   if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
969     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
970       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
971       if (SDValue OpNode =
972               DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
973         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
974       return SDValue();
975     }
976     if (N0.hasOneUse()) {
977       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
978       //              iff (op x, c1) has one use
979       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
980       if (!OpNode.getNode())
981         return SDValue();
982       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
983     }
984   }
985   return SDValue();
986 }
987 
988 // Try to reassociate commutative binops.
989 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
990                                     SDValue N1, SDNodeFlags Flags) {
991   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
992   // Don't reassociate reductions.
993   if (Flags.hasVectorReduction())
994     return SDValue();
995 
996   // Floating-point reassociation is not allowed without loose FP math.
997   if (N0.getValueType().isFloatingPoint() ||
998       N1.getValueType().isFloatingPoint())
999     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1000       return SDValue();
1001 
1002   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1003     return Combined;
1004   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1005     return Combined;
1006   return SDValue();
1007 }
1008 
1009 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1010                                bool AddTo) {
1011   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1012   ++NodesCombined;
1013   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1014              To[0].getNode()->dump(&DAG);
1015              dbgs() << " and " << NumTo - 1 << " other values\n");
1016   for (unsigned i = 0, e = NumTo; i != e; ++i)
1017     assert((!To[i].getNode() ||
1018             N->getValueType(i) == To[i].getValueType()) &&
1019            "Cannot combine value to value of different type!");
1020 
1021   WorklistRemover DeadNodes(*this);
1022   DAG.ReplaceAllUsesWith(N, To);
1023   if (AddTo) {
1024     // Push the new nodes and any users onto the worklist
1025     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1026       if (To[i].getNode()) {
1027         AddToWorklist(To[i].getNode());
1028         AddUsersToWorklist(To[i].getNode());
1029       }
1030     }
1031   }
1032 
1033   // Finally, if the node is now dead, remove it from the graph.  The node
1034   // may not be dead if the replacement process recursively simplified to
1035   // something else needing this node.
1036   if (N->use_empty())
1037     deleteAndRecombine(N);
1038   return SDValue(N, 0);
1039 }
1040 
1041 void DAGCombiner::
1042 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1043   // Replace all uses.  If any nodes become isomorphic to other nodes and
1044   // are deleted, make sure to remove them from our worklist.
1045   WorklistRemover DeadNodes(*this);
1046   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1047 
1048   // Push the new node and any (possibly new) users onto the worklist.
1049   AddToWorklistWithUsers(TLO.New.getNode());
1050 
1051   // Finally, if the node is now dead, remove it from the graph.  The node
1052   // may not be dead if the replacement process recursively simplified to
1053   // something else needing this node.
1054   if (TLO.Old.getNode()->use_empty())
1055     deleteAndRecombine(TLO.Old.getNode());
1056 }
1057 
1058 /// Check the specified integer node value to see if it can be simplified or if
1059 /// things it uses can be simplified by bit propagation. If so, return true.
1060 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1061                                        const APInt &DemandedElts) {
1062   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1063   KnownBits Known;
1064   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1065     return false;
1066 
1067   // Revisit the node.
1068   AddToWorklist(Op.getNode());
1069 
1070   // Replace the old value with the new one.
1071   ++NodesCombined;
1072   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1073              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1074              dbgs() << '\n');
1075 
1076   CommitTargetLoweringOpt(TLO);
1077   return true;
1078 }
1079 
1080 /// Check the specified vector node value to see if it can be simplified or
1081 /// if things it uses can be simplified as it only uses some of the elements.
1082 /// If so, return true.
1083 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1084                                              const APInt &DemandedElts,
1085                                              bool AssumeSingleUse) {
1086   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1087   APInt KnownUndef, KnownZero;
1088   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1089                                       TLO, 0, AssumeSingleUse))
1090     return false;
1091 
1092   // Revisit the node.
1093   AddToWorklist(Op.getNode());
1094 
1095   // Replace the old value with the new one.
1096   ++NodesCombined;
1097   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1098              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1099              dbgs() << '\n');
1100 
1101   CommitTargetLoweringOpt(TLO);
1102   return true;
1103 }
1104 
1105 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1106   SDLoc DL(Load);
1107   EVT VT = Load->getValueType(0);
1108   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1109 
1110   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1111              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1112   WorklistRemover DeadNodes(*this);
1113   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1114   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1115   deleteAndRecombine(Load);
1116   AddToWorklist(Trunc.getNode());
1117 }
1118 
1119 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1120   Replace = false;
1121   SDLoc DL(Op);
1122   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1123     LoadSDNode *LD = cast<LoadSDNode>(Op);
1124     EVT MemVT = LD->getMemoryVT();
1125     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1126                                                       : LD->getExtensionType();
1127     Replace = true;
1128     return DAG.getExtLoad(ExtType, DL, PVT,
1129                           LD->getChain(), LD->getBasePtr(),
1130                           MemVT, LD->getMemOperand());
1131   }
1132 
1133   unsigned Opc = Op.getOpcode();
1134   switch (Opc) {
1135   default: break;
1136   case ISD::AssertSext:
1137     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1138       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1139     break;
1140   case ISD::AssertZext:
1141     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1142       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1143     break;
1144   case ISD::Constant: {
1145     unsigned ExtOpc =
1146       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1147     return DAG.getNode(ExtOpc, DL, PVT, Op);
1148   }
1149   }
1150 
1151   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1152     return SDValue();
1153   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1154 }
1155 
1156 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1157   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1158     return SDValue();
1159   EVT OldVT = Op.getValueType();
1160   SDLoc DL(Op);
1161   bool Replace = false;
1162   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1163   if (!NewOp.getNode())
1164     return SDValue();
1165   AddToWorklist(NewOp.getNode());
1166 
1167   if (Replace)
1168     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1169   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1170                      DAG.getValueType(OldVT));
1171 }
1172 
1173 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1174   EVT OldVT = Op.getValueType();
1175   SDLoc DL(Op);
1176   bool Replace = false;
1177   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1178   if (!NewOp.getNode())
1179     return SDValue();
1180   AddToWorklist(NewOp.getNode());
1181 
1182   if (Replace)
1183     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1184   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1185 }
1186 
1187 /// Promote the specified integer binary operation if the target indicates it is
1188 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1189 /// i32 since i16 instructions are longer.
1190 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1191   if (!LegalOperations)
1192     return SDValue();
1193 
1194   EVT VT = Op.getValueType();
1195   if (VT.isVector() || !VT.isInteger())
1196     return SDValue();
1197 
1198   // If operation type is 'undesirable', e.g. i16 on x86, consider
1199   // promoting it.
1200   unsigned Opc = Op.getOpcode();
1201   if (TLI.isTypeDesirableForOp(Opc, VT))
1202     return SDValue();
1203 
1204   EVT PVT = VT;
1205   // Consult target whether it is a good idea to promote this operation and
1206   // what's the right type to promote it to.
1207   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1208     assert(PVT != VT && "Don't know what type to promote to!");
1209 
1210     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1211 
1212     bool Replace0 = false;
1213     SDValue N0 = Op.getOperand(0);
1214     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1215 
1216     bool Replace1 = false;
1217     SDValue N1 = Op.getOperand(1);
1218     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1219     SDLoc DL(Op);
1220 
1221     SDValue RV =
1222         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1223 
1224     // We are always replacing N0/N1's use in N and only need
1225     // additional replacements if there are additional uses.
1226     Replace0 &= !N0->hasOneUse();
1227     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1228 
1229     // Combine Op here so it is preserved past replacements.
1230     CombineTo(Op.getNode(), RV);
1231 
1232     // If operands have a use ordering, make sure we deal with
1233     // predecessor first.
1234     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1235       std::swap(N0, N1);
1236       std::swap(NN0, NN1);
1237     }
1238 
1239     if (Replace0) {
1240       AddToWorklist(NN0.getNode());
1241       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1242     }
1243     if (Replace1) {
1244       AddToWorklist(NN1.getNode());
1245       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1246     }
1247     return Op;
1248   }
1249   return SDValue();
1250 }
1251 
1252 /// Promote the specified integer shift operation if the target indicates it is
1253 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1254 /// i32 since i16 instructions are longer.
1255 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1256   if (!LegalOperations)
1257     return SDValue();
1258 
1259   EVT VT = Op.getValueType();
1260   if (VT.isVector() || !VT.isInteger())
1261     return SDValue();
1262 
1263   // If operation type is 'undesirable', e.g. i16 on x86, consider
1264   // promoting it.
1265   unsigned Opc = Op.getOpcode();
1266   if (TLI.isTypeDesirableForOp(Opc, VT))
1267     return SDValue();
1268 
1269   EVT PVT = VT;
1270   // Consult target whether it is a good idea to promote this operation and
1271   // what's the right type to promote it to.
1272   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1273     assert(PVT != VT && "Don't know what type to promote to!");
1274 
1275     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1276 
1277     bool Replace = false;
1278     SDValue N0 = Op.getOperand(0);
1279     SDValue N1 = Op.getOperand(1);
1280     if (Opc == ISD::SRA)
1281       N0 = SExtPromoteOperand(N0, PVT);
1282     else if (Opc == ISD::SRL)
1283       N0 = ZExtPromoteOperand(N0, PVT);
1284     else
1285       N0 = PromoteOperand(N0, PVT, Replace);
1286 
1287     if (!N0.getNode())
1288       return SDValue();
1289 
1290     SDLoc DL(Op);
1291     SDValue RV =
1292         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1293 
1294     if (Replace)
1295       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1296 
1297     // Deal with Op being deleted.
1298     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1299       return RV;
1300   }
1301   return SDValue();
1302 }
1303 
1304 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1305   if (!LegalOperations)
1306     return SDValue();
1307 
1308   EVT VT = Op.getValueType();
1309   if (VT.isVector() || !VT.isInteger())
1310     return SDValue();
1311 
1312   // If operation type is 'undesirable', e.g. i16 on x86, consider
1313   // promoting it.
1314   unsigned Opc = Op.getOpcode();
1315   if (TLI.isTypeDesirableForOp(Opc, VT))
1316     return SDValue();
1317 
1318   EVT PVT = VT;
1319   // Consult target whether it is a good idea to promote this operation and
1320   // what's the right type to promote it to.
1321   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1322     assert(PVT != VT && "Don't know what type to promote to!");
1323     // fold (aext (aext x)) -> (aext x)
1324     // fold (aext (zext x)) -> (zext x)
1325     // fold (aext (sext x)) -> (sext x)
1326     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1327     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1328   }
1329   return SDValue();
1330 }
1331 
1332 bool DAGCombiner::PromoteLoad(SDValue Op) {
1333   if (!LegalOperations)
1334     return false;
1335 
1336   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1337     return false;
1338 
1339   EVT VT = Op.getValueType();
1340   if (VT.isVector() || !VT.isInteger())
1341     return false;
1342 
1343   // If operation type is 'undesirable', e.g. i16 on x86, consider
1344   // promoting it.
1345   unsigned Opc = Op.getOpcode();
1346   if (TLI.isTypeDesirableForOp(Opc, VT))
1347     return false;
1348 
1349   EVT PVT = VT;
1350   // Consult target whether it is a good idea to promote this operation and
1351   // what's the right type to promote it to.
1352   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1353     assert(PVT != VT && "Don't know what type to promote to!");
1354 
1355     SDLoc DL(Op);
1356     SDNode *N = Op.getNode();
1357     LoadSDNode *LD = cast<LoadSDNode>(N);
1358     EVT MemVT = LD->getMemoryVT();
1359     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1360                                                       : LD->getExtensionType();
1361     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1362                                    LD->getChain(), LD->getBasePtr(),
1363                                    MemVT, LD->getMemOperand());
1364     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1365 
1366     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1367                Result.getNode()->dump(&DAG); dbgs() << '\n');
1368     WorklistRemover DeadNodes(*this);
1369     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1370     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1371     deleteAndRecombine(N);
1372     AddToWorklist(Result.getNode());
1373     return true;
1374   }
1375   return false;
1376 }
1377 
1378 /// Recursively delete a node which has no uses and any operands for
1379 /// which it is the only use.
1380 ///
1381 /// Note that this both deletes the nodes and removes them from the worklist.
1382 /// It also adds any nodes who have had a user deleted to the worklist as they
1383 /// may now have only one use and subject to other combines.
1384 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1385   if (!N->use_empty())
1386     return false;
1387 
1388   SmallSetVector<SDNode *, 16> Nodes;
1389   Nodes.insert(N);
1390   do {
1391     N = Nodes.pop_back_val();
1392     if (!N)
1393       continue;
1394 
1395     if (N->use_empty()) {
1396       for (const SDValue &ChildN : N->op_values())
1397         Nodes.insert(ChildN.getNode());
1398 
1399       removeFromWorklist(N);
1400       DAG.DeleteNode(N);
1401     } else {
1402       AddToWorklist(N);
1403     }
1404   } while (!Nodes.empty());
1405   return true;
1406 }
1407 
1408 //===----------------------------------------------------------------------===//
1409 //  Main DAG Combiner implementation
1410 //===----------------------------------------------------------------------===//
1411 
1412 void DAGCombiner::Run(CombineLevel AtLevel) {
1413   // set the instance variables, so that the various visit routines may use it.
1414   Level = AtLevel;
1415   LegalDAG = Level >= AfterLegalizeDAG;
1416   LegalOperations = Level >= AfterLegalizeVectorOps;
1417   LegalTypes = Level >= AfterLegalizeTypes;
1418 
1419   WorklistInserter AddNodes(*this);
1420 
1421   // Add all the dag nodes to the worklist.
1422   for (SDNode &Node : DAG.allnodes())
1423     AddToWorklist(&Node);
1424 
1425   // Create a dummy node (which is not added to allnodes), that adds a reference
1426   // to the root node, preventing it from being deleted, and tracking any
1427   // changes of the root.
1428   HandleSDNode Dummy(DAG.getRoot());
1429 
1430   // While we have a valid worklist entry node, try to combine it.
1431   while (SDNode *N = getNextWorklistEntry()) {
1432     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1433     // N is deleted from the DAG, since they too may now be dead or may have a
1434     // reduced number of uses, allowing other xforms.
1435     if (recursivelyDeleteUnusedNodes(N))
1436       continue;
1437 
1438     WorklistRemover DeadNodes(*this);
1439 
1440     // If this combine is running after legalizing the DAG, re-legalize any
1441     // nodes pulled off the worklist.
1442     if (LegalDAG) {
1443       SmallSetVector<SDNode *, 16> UpdatedNodes;
1444       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1445 
1446       for (SDNode *LN : UpdatedNodes)
1447         AddToWorklistWithUsers(LN);
1448 
1449       if (!NIsValid)
1450         continue;
1451     }
1452 
1453     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1454 
1455     // Add any operands of the new node which have not yet been combined to the
1456     // worklist as well. Because the worklist uniques things already, this
1457     // won't repeatedly process the same operand.
1458     CombinedNodes.insert(N);
1459     for (const SDValue &ChildN : N->op_values())
1460       if (!CombinedNodes.count(ChildN.getNode()))
1461         AddToWorklist(ChildN.getNode());
1462 
1463     SDValue RV = combine(N);
1464 
1465     if (!RV.getNode())
1466       continue;
1467 
1468     ++NodesCombined;
1469 
1470     // If we get back the same node we passed in, rather than a new node or
1471     // zero, we know that the node must have defined multiple values and
1472     // CombineTo was used.  Since CombineTo takes care of the worklist
1473     // mechanics for us, we have no work to do in this case.
1474     if (RV.getNode() == N)
1475       continue;
1476 
1477     assert(N->getOpcode() != ISD::DELETED_NODE &&
1478            RV.getOpcode() != ISD::DELETED_NODE &&
1479            "Node was deleted but visit returned new node!");
1480 
1481     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1482 
1483     if (N->getNumValues() == RV.getNode()->getNumValues())
1484       DAG.ReplaceAllUsesWith(N, RV.getNode());
1485     else {
1486       assert(N->getValueType(0) == RV.getValueType() &&
1487              N->getNumValues() == 1 && "Type mismatch");
1488       DAG.ReplaceAllUsesWith(N, &RV);
1489     }
1490 
1491     // Push the new node and any users onto the worklist
1492     AddToWorklist(RV.getNode());
1493     AddUsersToWorklist(RV.getNode());
1494 
1495     // Finally, if the node is now dead, remove it from the graph.  The node
1496     // may not be dead if the replacement process recursively simplified to
1497     // something else needing this node. This will also take care of adding any
1498     // operands which have lost a user to the worklist.
1499     recursivelyDeleteUnusedNodes(N);
1500   }
1501 
1502   // If the root changed (e.g. it was a dead load, update the root).
1503   DAG.setRoot(Dummy.getValue());
1504   DAG.RemoveDeadNodes();
1505 }
1506 
1507 SDValue DAGCombiner::visit(SDNode *N) {
1508   switch (N->getOpcode()) {
1509   default: break;
1510   case ISD::TokenFactor:        return visitTokenFactor(N);
1511   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1512   case ISD::ADD:                return visitADD(N);
1513   case ISD::SUB:                return visitSUB(N);
1514   case ISD::SADDSAT:
1515   case ISD::UADDSAT:            return visitADDSAT(N);
1516   case ISD::SSUBSAT:
1517   case ISD::USUBSAT:            return visitSUBSAT(N);
1518   case ISD::ADDC:               return visitADDC(N);
1519   case ISD::SADDO:
1520   case ISD::UADDO:              return visitADDO(N);
1521   case ISD::SUBC:               return visitSUBC(N);
1522   case ISD::SSUBO:
1523   case ISD::USUBO:              return visitSUBO(N);
1524   case ISD::ADDE:               return visitADDE(N);
1525   case ISD::ADDCARRY:           return visitADDCARRY(N);
1526   case ISD::SUBE:               return visitSUBE(N);
1527   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1528   case ISD::SMULFIX:
1529   case ISD::SMULFIXSAT:
1530   case ISD::UMULFIX:
1531   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1532   case ISD::MUL:                return visitMUL(N);
1533   case ISD::SDIV:               return visitSDIV(N);
1534   case ISD::UDIV:               return visitUDIV(N);
1535   case ISD::SREM:
1536   case ISD::UREM:               return visitREM(N);
1537   case ISD::MULHU:              return visitMULHU(N);
1538   case ISD::MULHS:              return visitMULHS(N);
1539   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1540   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1541   case ISD::SMULO:
1542   case ISD::UMULO:              return visitMULO(N);
1543   case ISD::SMIN:
1544   case ISD::SMAX:
1545   case ISD::UMIN:
1546   case ISD::UMAX:               return visitIMINMAX(N);
1547   case ISD::AND:                return visitAND(N);
1548   case ISD::OR:                 return visitOR(N);
1549   case ISD::XOR:                return visitXOR(N);
1550   case ISD::SHL:                return visitSHL(N);
1551   case ISD::SRA:                return visitSRA(N);
1552   case ISD::SRL:                return visitSRL(N);
1553   case ISD::ROTR:
1554   case ISD::ROTL:               return visitRotate(N);
1555   case ISD::FSHL:
1556   case ISD::FSHR:               return visitFunnelShift(N);
1557   case ISD::ABS:                return visitABS(N);
1558   case ISD::BSWAP:              return visitBSWAP(N);
1559   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1560   case ISD::CTLZ:               return visitCTLZ(N);
1561   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1562   case ISD::CTTZ:               return visitCTTZ(N);
1563   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1564   case ISD::CTPOP:              return visitCTPOP(N);
1565   case ISD::SELECT:             return visitSELECT(N);
1566   case ISD::VSELECT:            return visitVSELECT(N);
1567   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1568   case ISD::SETCC:              return visitSETCC(N);
1569   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1570   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1571   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1572   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1573   case ISD::AssertSext:
1574   case ISD::AssertZext:         return visitAssertExt(N);
1575   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1576   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1577   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1578   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1579   case ISD::BITCAST:            return visitBITCAST(N);
1580   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1581   case ISD::FADD:               return visitFADD(N);
1582   case ISD::FSUB:               return visitFSUB(N);
1583   case ISD::FMUL:               return visitFMUL(N);
1584   case ISD::FMA:                return visitFMA(N);
1585   case ISD::FDIV:               return visitFDIV(N);
1586   case ISD::FREM:               return visitFREM(N);
1587   case ISD::FSQRT:              return visitFSQRT(N);
1588   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1589   case ISD::FPOW:               return visitFPOW(N);
1590   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1591   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1592   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1593   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1594   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1595   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1596   case ISD::FNEG:               return visitFNEG(N);
1597   case ISD::FABS:               return visitFABS(N);
1598   case ISD::FFLOOR:             return visitFFLOOR(N);
1599   case ISD::FMINNUM:            return visitFMINNUM(N);
1600   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1601   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1602   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1603   case ISD::FCEIL:              return visitFCEIL(N);
1604   case ISD::FTRUNC:             return visitFTRUNC(N);
1605   case ISD::BRCOND:             return visitBRCOND(N);
1606   case ISD::BR_CC:              return visitBR_CC(N);
1607   case ISD::LOAD:               return visitLOAD(N);
1608   case ISD::STORE:              return visitSTORE(N);
1609   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1610   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1611   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1612   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1613   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1614   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1615   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1616   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1617   case ISD::MGATHER:            return visitMGATHER(N);
1618   case ISD::MLOAD:              return visitMLOAD(N);
1619   case ISD::MSCATTER:           return visitMSCATTER(N);
1620   case ISD::MSTORE:             return visitMSTORE(N);
1621   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1622   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1623   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1624   case ISD::VECREDUCE_FADD:
1625   case ISD::VECREDUCE_FMUL:
1626   case ISD::VECREDUCE_ADD:
1627   case ISD::VECREDUCE_MUL:
1628   case ISD::VECREDUCE_AND:
1629   case ISD::VECREDUCE_OR:
1630   case ISD::VECREDUCE_XOR:
1631   case ISD::VECREDUCE_SMAX:
1632   case ISD::VECREDUCE_SMIN:
1633   case ISD::VECREDUCE_UMAX:
1634   case ISD::VECREDUCE_UMIN:
1635   case ISD::VECREDUCE_FMAX:
1636   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1637   }
1638   return SDValue();
1639 }
1640 
1641 SDValue DAGCombiner::combine(SDNode *N) {
1642   SDValue RV = visit(N);
1643 
1644   // If nothing happened, try a target-specific DAG combine.
1645   if (!RV.getNode()) {
1646     assert(N->getOpcode() != ISD::DELETED_NODE &&
1647            "Node was deleted but visit returned NULL!");
1648 
1649     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1650         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1651 
1652       // Expose the DAG combiner to the target combiner impls.
1653       TargetLowering::DAGCombinerInfo
1654         DagCombineInfo(DAG, Level, false, this);
1655 
1656       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1657     }
1658   }
1659 
1660   // If nothing happened still, try promoting the operation.
1661   if (!RV.getNode()) {
1662     switch (N->getOpcode()) {
1663     default: break;
1664     case ISD::ADD:
1665     case ISD::SUB:
1666     case ISD::MUL:
1667     case ISD::AND:
1668     case ISD::OR:
1669     case ISD::XOR:
1670       RV = PromoteIntBinOp(SDValue(N, 0));
1671       break;
1672     case ISD::SHL:
1673     case ISD::SRA:
1674     case ISD::SRL:
1675       RV = PromoteIntShiftOp(SDValue(N, 0));
1676       break;
1677     case ISD::SIGN_EXTEND:
1678     case ISD::ZERO_EXTEND:
1679     case ISD::ANY_EXTEND:
1680       RV = PromoteExtend(SDValue(N, 0));
1681       break;
1682     case ISD::LOAD:
1683       if (PromoteLoad(SDValue(N, 0)))
1684         RV = SDValue(N, 0);
1685       break;
1686     }
1687   }
1688 
1689   // If N is a commutative binary node, try to eliminate it if the commuted
1690   // version is already present in the DAG.
1691   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1692       N->getNumValues() == 1) {
1693     SDValue N0 = N->getOperand(0);
1694     SDValue N1 = N->getOperand(1);
1695 
1696     // Constant operands are canonicalized to RHS.
1697     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1698       SDValue Ops[] = {N1, N0};
1699       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1700                                             N->getFlags());
1701       if (CSENode)
1702         return SDValue(CSENode, 0);
1703     }
1704   }
1705 
1706   return RV;
1707 }
1708 
1709 /// Given a node, return its input chain if it has one, otherwise return a null
1710 /// sd operand.
1711 static SDValue getInputChainForNode(SDNode *N) {
1712   if (unsigned NumOps = N->getNumOperands()) {
1713     if (N->getOperand(0).getValueType() == MVT::Other)
1714       return N->getOperand(0);
1715     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1716       return N->getOperand(NumOps-1);
1717     for (unsigned i = 1; i < NumOps-1; ++i)
1718       if (N->getOperand(i).getValueType() == MVT::Other)
1719         return N->getOperand(i);
1720   }
1721   return SDValue();
1722 }
1723 
1724 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1725   // If N has two operands, where one has an input chain equal to the other,
1726   // the 'other' chain is redundant.
1727   if (N->getNumOperands() == 2) {
1728     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1729       return N->getOperand(0);
1730     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1731       return N->getOperand(1);
1732   }
1733 
1734   // Don't simplify token factors if optnone.
1735   if (OptLevel == CodeGenOpt::None)
1736     return SDValue();
1737 
1738   // If the sole user is a token factor, we should make sure we have a
1739   // chance to merge them together. This prevents TF chains from inhibiting
1740   // optimizations.
1741   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1742     AddToWorklist(*(N->use_begin()));
1743 
1744   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1745   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1746   SmallPtrSet<SDNode*, 16> SeenOps;
1747   bool Changed = false;             // If we should replace this token factor.
1748 
1749   // Start out with this token factor.
1750   TFs.push_back(N);
1751 
1752   // Iterate through token factors.  The TFs grows when new token factors are
1753   // encountered.
1754   for (unsigned i = 0; i < TFs.size(); ++i) {
1755     // Limit number of nodes to inline, to avoid quadratic compile times.
1756     // We have to add the outstanding Token Factors to Ops, otherwise we might
1757     // drop Ops from the resulting Token Factors.
1758     if (Ops.size() > TokenFactorInlineLimit) {
1759       for (unsigned j = i; j < TFs.size(); j++)
1760         Ops.emplace_back(TFs[j], 0);
1761       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1762       // combiner worklist later.
1763       TFs.resize(i);
1764       break;
1765     }
1766 
1767     SDNode *TF = TFs[i];
1768     // Check each of the operands.
1769     for (const SDValue &Op : TF->op_values()) {
1770       switch (Op.getOpcode()) {
1771       case ISD::EntryToken:
1772         // Entry tokens don't need to be added to the list. They are
1773         // redundant.
1774         Changed = true;
1775         break;
1776 
1777       case ISD::TokenFactor:
1778         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1779           // Queue up for processing.
1780           TFs.push_back(Op.getNode());
1781           Changed = true;
1782           break;
1783         }
1784         LLVM_FALLTHROUGH;
1785 
1786       default:
1787         // Only add if it isn't already in the list.
1788         if (SeenOps.insert(Op.getNode()).second)
1789           Ops.push_back(Op);
1790         else
1791           Changed = true;
1792         break;
1793       }
1794     }
1795   }
1796 
1797   // Re-visit inlined Token Factors, to clean them up in case they have been
1798   // removed. Skip the first Token Factor, as this is the current node.
1799   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1800     AddToWorklist(TFs[i]);
1801 
1802   // Remove Nodes that are chained to another node in the list. Do so
1803   // by walking up chains breath-first stopping when we've seen
1804   // another operand. In general we must climb to the EntryNode, but we can exit
1805   // early if we find all remaining work is associated with just one operand as
1806   // no further pruning is possible.
1807 
1808   // List of nodes to search through and original Ops from which they originate.
1809   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1810   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1811   SmallPtrSet<SDNode *, 16> SeenChains;
1812   bool DidPruneOps = false;
1813 
1814   unsigned NumLeftToConsider = 0;
1815   for (const SDValue &Op : Ops) {
1816     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1817     OpWorkCount.push_back(1);
1818   }
1819 
1820   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1821     // If this is an Op, we can remove the op from the list. Remark any
1822     // search associated with it as from the current OpNumber.
1823     if (SeenOps.count(Op) != 0) {
1824       Changed = true;
1825       DidPruneOps = true;
1826       unsigned OrigOpNumber = 0;
1827       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1828         OrigOpNumber++;
1829       assert((OrigOpNumber != Ops.size()) &&
1830              "expected to find TokenFactor Operand");
1831       // Re-mark worklist from OrigOpNumber to OpNumber
1832       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1833         if (Worklist[i].second == OrigOpNumber) {
1834           Worklist[i].second = OpNumber;
1835         }
1836       }
1837       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1838       OpWorkCount[OrigOpNumber] = 0;
1839       NumLeftToConsider--;
1840     }
1841     // Add if it's a new chain
1842     if (SeenChains.insert(Op).second) {
1843       OpWorkCount[OpNumber]++;
1844       Worklist.push_back(std::make_pair(Op, OpNumber));
1845     }
1846   };
1847 
1848   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1849     // We need at least be consider at least 2 Ops to prune.
1850     if (NumLeftToConsider <= 1)
1851       break;
1852     auto CurNode = Worklist[i].first;
1853     auto CurOpNumber = Worklist[i].second;
1854     assert((OpWorkCount[CurOpNumber] > 0) &&
1855            "Node should not appear in worklist");
1856     switch (CurNode->getOpcode()) {
1857     case ISD::EntryToken:
1858       // Hitting EntryToken is the only way for the search to terminate without
1859       // hitting
1860       // another operand's search. Prevent us from marking this operand
1861       // considered.
1862       NumLeftToConsider++;
1863       break;
1864     case ISD::TokenFactor:
1865       for (const SDValue &Op : CurNode->op_values())
1866         AddToWorklist(i, Op.getNode(), CurOpNumber);
1867       break;
1868     case ISD::LIFETIME_START:
1869     case ISD::LIFETIME_END:
1870     case ISD::CopyFromReg:
1871     case ISD::CopyToReg:
1872       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1873       break;
1874     default:
1875       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1876         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1877       break;
1878     }
1879     OpWorkCount[CurOpNumber]--;
1880     if (OpWorkCount[CurOpNumber] == 0)
1881       NumLeftToConsider--;
1882   }
1883 
1884   // If we've changed things around then replace token factor.
1885   if (Changed) {
1886     SDValue Result;
1887     if (Ops.empty()) {
1888       // The entry token is the only possible outcome.
1889       Result = DAG.getEntryNode();
1890     } else {
1891       if (DidPruneOps) {
1892         SmallVector<SDValue, 8> PrunedOps;
1893         //
1894         for (const SDValue &Op : Ops) {
1895           if (SeenChains.count(Op.getNode()) == 0)
1896             PrunedOps.push_back(Op);
1897         }
1898         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1899       } else {
1900         Result = DAG.getTokenFactor(SDLoc(N), Ops);
1901       }
1902     }
1903     return Result;
1904   }
1905   return SDValue();
1906 }
1907 
1908 /// MERGE_VALUES can always be eliminated.
1909 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1910   WorklistRemover DeadNodes(*this);
1911   // Replacing results may cause a different MERGE_VALUES to suddenly
1912   // be CSE'd with N, and carry its uses with it. Iterate until no
1913   // uses remain, to ensure that the node can be safely deleted.
1914   // First add the users of this node to the work list so that they
1915   // can be tried again once they have new operands.
1916   AddUsersToWorklist(N);
1917   do {
1918     // Do as a single replacement to avoid rewalking use lists.
1919     SmallVector<SDValue, 8> Ops;
1920     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1921       Ops.push_back(N->getOperand(i));
1922     DAG.ReplaceAllUsesWith(N, Ops.data());
1923   } while (!N->use_empty());
1924   deleteAndRecombine(N);
1925   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1926 }
1927 
1928 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1929 /// ConstantSDNode pointer else nullptr.
1930 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1931   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1932   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1933 }
1934 
1935 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1936   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
1937          "Unexpected binary operator");
1938 
1939   // Don't do this unless the old select is going away. We want to eliminate the
1940   // binary operator, not replace a binop with a select.
1941   // TODO: Handle ISD::SELECT_CC.
1942   unsigned SelOpNo = 0;
1943   SDValue Sel = BO->getOperand(0);
1944   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1945     SelOpNo = 1;
1946     Sel = BO->getOperand(1);
1947   }
1948 
1949   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1950     return SDValue();
1951 
1952   SDValue CT = Sel.getOperand(1);
1953   if (!isConstantOrConstantVector(CT, true) &&
1954       !isConstantFPBuildVectorOrConstantFP(CT))
1955     return SDValue();
1956 
1957   SDValue CF = Sel.getOperand(2);
1958   if (!isConstantOrConstantVector(CF, true) &&
1959       !isConstantFPBuildVectorOrConstantFP(CF))
1960     return SDValue();
1961 
1962   // Bail out if any constants are opaque because we can't constant fold those.
1963   // The exception is "and" and "or" with either 0 or -1 in which case we can
1964   // propagate non constant operands into select. I.e.:
1965   // and (select Cond, 0, -1), X --> select Cond, 0, X
1966   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1967   auto BinOpcode = BO->getOpcode();
1968   bool CanFoldNonConst =
1969       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1970       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
1971       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
1972 
1973   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1974   if (!CanFoldNonConst &&
1975       !isConstantOrConstantVector(CBO, true) &&
1976       !isConstantFPBuildVectorOrConstantFP(CBO))
1977     return SDValue();
1978 
1979   EVT VT = Sel.getValueType();
1980 
1981   // In case of shift value and shift amount may have different VT. For instance
1982   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1983   // swapped operands and value types do not match. NB: x86 is fine if operands
1984   // are not swapped with shift amount VT being not bigger than shifted value.
1985   // TODO: that is possible to check for a shift operation, correct VTs and
1986   // still perform optimization on x86 if needed.
1987   if (SelOpNo && VT != CBO.getValueType())
1988     return SDValue();
1989 
1990   // We have a select-of-constants followed by a binary operator with a
1991   // constant. Eliminate the binop by pulling the constant math into the select.
1992   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1993   SDLoc DL(Sel);
1994   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1995                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1996   if (!CanFoldNonConst && !NewCT.isUndef() &&
1997       !isConstantOrConstantVector(NewCT, true) &&
1998       !isConstantFPBuildVectorOrConstantFP(NewCT))
1999     return SDValue();
2000 
2001   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2002                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2003   if (!CanFoldNonConst && !NewCF.isUndef() &&
2004       !isConstantOrConstantVector(NewCF, true) &&
2005       !isConstantFPBuildVectorOrConstantFP(NewCF))
2006     return SDValue();
2007 
2008   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2009   SelectOp->setFlags(BO->getFlags());
2010   return SelectOp;
2011 }
2012 
2013 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2014   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2015          "Expecting add or sub");
2016 
2017   // Match a constant operand and a zext operand for the math instruction:
2018   // add Z, C
2019   // sub C, Z
2020   bool IsAdd = N->getOpcode() == ISD::ADD;
2021   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2022   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2023   auto *CN = dyn_cast<ConstantSDNode>(C);
2024   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2025     return SDValue();
2026 
2027   // Match the zext operand as a setcc of a boolean.
2028   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2029       Z.getOperand(0).getValueType() != MVT::i1)
2030     return SDValue();
2031 
2032   // Match the compare as: setcc (X & 1), 0, eq.
2033   SDValue SetCC = Z.getOperand(0);
2034   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2035   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2036       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2037       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2038     return SDValue();
2039 
2040   // We are adding/subtracting a constant and an inverted low bit. Turn that
2041   // into a subtract/add of the low bit with incremented/decremented constant:
2042   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2043   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2044   EVT VT = C.getValueType();
2045   SDLoc DL(N);
2046   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2047   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2048                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2049   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2050 }
2051 
2052 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2053 /// a shift and add with a different constant.
2054 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2055   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2056          "Expecting add or sub");
2057 
2058   // We need a constant operand for the add/sub, and the other operand is a
2059   // logical shift right: add (srl), C or sub C, (srl).
2060   // TODO - support non-uniform vector amounts.
2061   bool IsAdd = N->getOpcode() == ISD::ADD;
2062   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2063   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2064   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2065   if (!C || ShiftOp.getOpcode() != ISD::SRL)
2066     return SDValue();
2067 
2068   // The shift must be of a 'not' value.
2069   SDValue Not = ShiftOp.getOperand(0);
2070   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2071     return SDValue();
2072 
2073   // The shift must be moving the sign bit to the least-significant-bit.
2074   EVT VT = ShiftOp.getValueType();
2075   SDValue ShAmt = ShiftOp.getOperand(1);
2076   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2077   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2078     return SDValue();
2079 
2080   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2081   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2082   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2083   SDLoc DL(N);
2084   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2085   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2086   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2087   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2088 }
2089 
2090 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2091 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2092 /// are no common bits set in the operands).
2093 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2094   SDValue N0 = N->getOperand(0);
2095   SDValue N1 = N->getOperand(1);
2096   EVT VT = N0.getValueType();
2097   SDLoc DL(N);
2098 
2099   // fold vector ops
2100   if (VT.isVector()) {
2101     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2102       return FoldedVOp;
2103 
2104     // fold (add x, 0) -> x, vector edition
2105     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2106       return N0;
2107     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2108       return N1;
2109   }
2110 
2111   // fold (add x, undef) -> undef
2112   if (N0.isUndef())
2113     return N0;
2114 
2115   if (N1.isUndef())
2116     return N1;
2117 
2118   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2119     // canonicalize constant to RHS
2120     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2121       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2122     // fold (add c1, c2) -> c1+c2
2123     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2124   }
2125 
2126   // fold (add x, 0) -> x
2127   if (isNullConstant(N1))
2128     return N0;
2129 
2130   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2131     // fold ((A-c1)+c2) -> (A+(c2-c1))
2132     if (N0.getOpcode() == ISD::SUB &&
2133         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2134       SDValue Sub =
2135           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2136       assert(Sub && "Constant folding failed");
2137       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2138     }
2139 
2140     // fold ((c1-A)+c2) -> (c1+c2)-A
2141     if (N0.getOpcode() == ISD::SUB &&
2142         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2143       SDValue Add =
2144           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2145       assert(Add && "Constant folding failed");
2146       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2147     }
2148 
2149     // add (sext i1 X), 1 -> zext (not i1 X)
2150     // We don't transform this pattern:
2151     //   add (zext i1 X), -1 -> sext (not i1 X)
2152     // because most (?) targets generate better code for the zext form.
2153     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2154         isOneOrOneSplat(N1)) {
2155       SDValue X = N0.getOperand(0);
2156       if ((!LegalOperations ||
2157            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2158             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2159           X.getScalarValueSizeInBits() == 1) {
2160         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2161         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2162       }
2163     }
2164 
2165     // Undo the add -> or combine to merge constant offsets from a frame index.
2166     if (N0.getOpcode() == ISD::OR &&
2167         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2168         isa<ConstantSDNode>(N0.getOperand(1)) &&
2169         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2170       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2171       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2172     }
2173   }
2174 
2175   if (SDValue NewSel = foldBinOpIntoSelect(N))
2176     return NewSel;
2177 
2178   // reassociate add
2179   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2180     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2181       return RADD;
2182   }
2183   // fold ((0-A) + B) -> B-A
2184   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2185     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2186 
2187   // fold (A + (0-B)) -> A-B
2188   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2189     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2190 
2191   // fold (A+(B-A)) -> B
2192   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2193     return N1.getOperand(0);
2194 
2195   // fold ((B-A)+A) -> B
2196   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2197     return N0.getOperand(0);
2198 
2199   // fold ((A-B)+(C-A)) -> (C-B)
2200   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2201       N0.getOperand(0) == N1.getOperand(1))
2202     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2203                        N0.getOperand(1));
2204 
2205   // fold ((A-B)+(B-C)) -> (A-C)
2206   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2207       N0.getOperand(1) == N1.getOperand(0))
2208     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2209                        N1.getOperand(1));
2210 
2211   // fold (A+(B-(A+C))) to (B-C)
2212   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2213       N0 == N1.getOperand(1).getOperand(0))
2214     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2215                        N1.getOperand(1).getOperand(1));
2216 
2217   // fold (A+(B-(C+A))) to (B-C)
2218   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2219       N0 == N1.getOperand(1).getOperand(1))
2220     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2221                        N1.getOperand(1).getOperand(0));
2222 
2223   // fold (A+((B-A)+or-C)) to (B+or-C)
2224   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2225       N1.getOperand(0).getOpcode() == ISD::SUB &&
2226       N0 == N1.getOperand(0).getOperand(1))
2227     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2228                        N1.getOperand(1));
2229 
2230   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2231   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2232     SDValue N00 = N0.getOperand(0);
2233     SDValue N01 = N0.getOperand(1);
2234     SDValue N10 = N1.getOperand(0);
2235     SDValue N11 = N1.getOperand(1);
2236 
2237     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2238       return DAG.getNode(ISD::SUB, DL, VT,
2239                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2240                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2241   }
2242 
2243   // fold (add (umax X, C), -C) --> (usubsat X, C)
2244   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2245     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2246       return (!Max && !Op) ||
2247              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2248     };
2249     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2250                                   /*AllowUndefs*/ true))
2251       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2252                          N0.getOperand(1));
2253   }
2254 
2255   if (SimplifyDemandedBits(SDValue(N, 0)))
2256     return SDValue(N, 0);
2257 
2258   if (isOneOrOneSplat(N1)) {
2259     // fold (add (xor a, -1), 1) -> (sub 0, a)
2260     if (isBitwiseNot(N0))
2261       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2262                          N0.getOperand(0));
2263 
2264     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2265     if (N0.getOpcode() == ISD::ADD ||
2266         N0.getOpcode() == ISD::UADDO ||
2267         N0.getOpcode() == ISD::SADDO) {
2268       SDValue A, Xor;
2269 
2270       if (isBitwiseNot(N0.getOperand(0))) {
2271         A = N0.getOperand(1);
2272         Xor = N0.getOperand(0);
2273       } else if (isBitwiseNot(N0.getOperand(1))) {
2274         A = N0.getOperand(0);
2275         Xor = N0.getOperand(1);
2276       }
2277 
2278       if (Xor)
2279         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2280     }
2281 
2282     // Look for:
2283     //   add (add x, y), 1
2284     // And if the target does not like this form then turn into:
2285     //   sub y, (xor x, -1)
2286     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2287         N0.getOpcode() == ISD::ADD) {
2288       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2289                                 DAG.getAllOnesConstant(DL, VT));
2290       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2291     }
2292   }
2293 
2294   // (x - y) + -1  ->  add (xor y, -1), x
2295   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2296       isAllOnesOrAllOnesSplat(N1)) {
2297     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2298     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2299   }
2300 
2301   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2302     return Combined;
2303 
2304   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2305     return Combined;
2306 
2307   return SDValue();
2308 }
2309 
2310 SDValue DAGCombiner::visitADD(SDNode *N) {
2311   SDValue N0 = N->getOperand(0);
2312   SDValue N1 = N->getOperand(1);
2313   EVT VT = N0.getValueType();
2314   SDLoc DL(N);
2315 
2316   if (SDValue Combined = visitADDLike(N))
2317     return Combined;
2318 
2319   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2320     return V;
2321 
2322   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2323     return V;
2324 
2325   // fold (a+b) -> (a|b) iff a and b share no bits.
2326   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2327       DAG.haveNoCommonBitsSet(N0, N1))
2328     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2329 
2330   return SDValue();
2331 }
2332 
2333 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2334   unsigned Opcode = N->getOpcode();
2335   SDValue N0 = N->getOperand(0);
2336   SDValue N1 = N->getOperand(1);
2337   EVT VT = N0.getValueType();
2338   SDLoc DL(N);
2339 
2340   // fold vector ops
2341   if (VT.isVector()) {
2342     // TODO SimplifyVBinOp
2343 
2344     // fold (add_sat x, 0) -> x, vector edition
2345     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2346       return N0;
2347     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2348       return N1;
2349   }
2350 
2351   // fold (add_sat x, undef) -> -1
2352   if (N0.isUndef() || N1.isUndef())
2353     return DAG.getAllOnesConstant(DL, VT);
2354 
2355   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2356     // canonicalize constant to RHS
2357     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2358       return DAG.getNode(Opcode, DL, VT, N1, N0);
2359     // fold (add_sat c1, c2) -> c3
2360     return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2361   }
2362 
2363   // fold (add_sat x, 0) -> x
2364   if (isNullConstant(N1))
2365     return N0;
2366 
2367   // If it cannot overflow, transform into an add.
2368   if (Opcode == ISD::UADDSAT)
2369     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2370       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2371 
2372   return SDValue();
2373 }
2374 
2375 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2376   bool Masked = false;
2377 
2378   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2379   while (true) {
2380     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2381       V = V.getOperand(0);
2382       continue;
2383     }
2384 
2385     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2386       Masked = true;
2387       V = V.getOperand(0);
2388       continue;
2389     }
2390 
2391     break;
2392   }
2393 
2394   // If this is not a carry, return.
2395   if (V.getResNo() != 1)
2396     return SDValue();
2397 
2398   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2399       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2400     return SDValue();
2401 
2402   EVT VT = V.getNode()->getValueType(0);
2403   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2404     return SDValue();
2405 
2406   // If the result is masked, then no matter what kind of bool it is we can
2407   // return. If it isn't, then we need to make sure the bool type is either 0 or
2408   // 1 and not other values.
2409   if (Masked ||
2410       TLI.getBooleanContents(V.getValueType()) ==
2411           TargetLoweringBase::ZeroOrOneBooleanContent)
2412     return V;
2413 
2414   return SDValue();
2415 }
2416 
2417 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2418 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2419 /// the opcode and bypass the mask operation.
2420 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2421                                  SelectionDAG &DAG, const SDLoc &DL) {
2422   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2423     return SDValue();
2424 
2425   EVT VT = N0.getValueType();
2426   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2427     return SDValue();
2428 
2429   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2430   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2431   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2432 }
2433 
2434 /// Helper for doing combines based on N0 and N1 being added to each other.
2435 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2436                                           SDNode *LocReference) {
2437   EVT VT = N0.getValueType();
2438   SDLoc DL(LocReference);
2439 
2440   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2441   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2442       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2443     return DAG.getNode(ISD::SUB, DL, VT, N0,
2444                        DAG.getNode(ISD::SHL, DL, VT,
2445                                    N1.getOperand(0).getOperand(1),
2446                                    N1.getOperand(1)));
2447 
2448   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2449     return V;
2450 
2451   // Look for:
2452   //   add (add x, 1), y
2453   // And if the target does not like this form then turn into:
2454   //   sub y, (xor x, -1)
2455   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2456       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2457     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2458                               DAG.getAllOnesConstant(DL, VT));
2459     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2460   }
2461 
2462   // Hoist one-use subtraction by non-opaque constant:
2463   //   (x - C) + y  ->  (x + y) - C
2464   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2465   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2466       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2467     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2468     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2469   }
2470   // Hoist one-use subtraction from non-opaque constant:
2471   //   (C - x) + y  ->  (y - x) + C
2472   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2473       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2474     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2475     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2476   }
2477 
2478   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2479   // rather than 'add 0/-1' (the zext should get folded).
2480   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2481   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2482       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2483       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2484     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2485     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2486   }
2487 
2488   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2489   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2490     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2491     if (TN->getVT() == MVT::i1) {
2492       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2493                                  DAG.getConstant(1, DL, VT));
2494       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2495     }
2496   }
2497 
2498   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2499   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2500       N1.getResNo() == 0)
2501     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2502                        N0, N1.getOperand(0), N1.getOperand(2));
2503 
2504   // (add X, Carry) -> (addcarry X, 0, Carry)
2505   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2506     if (SDValue Carry = getAsCarry(TLI, N1))
2507       return DAG.getNode(ISD::ADDCARRY, DL,
2508                          DAG.getVTList(VT, Carry.getValueType()), N0,
2509                          DAG.getConstant(0, DL, VT), Carry);
2510 
2511   return SDValue();
2512 }
2513 
2514 SDValue DAGCombiner::visitADDC(SDNode *N) {
2515   SDValue N0 = N->getOperand(0);
2516   SDValue N1 = N->getOperand(1);
2517   EVT VT = N0.getValueType();
2518   SDLoc DL(N);
2519 
2520   // If the flag result is dead, turn this into an ADD.
2521   if (!N->hasAnyUseOfValue(1))
2522     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2523                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2524 
2525   // canonicalize constant to RHS.
2526   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2527   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2528   if (N0C && !N1C)
2529     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2530 
2531   // fold (addc x, 0) -> x + no carry out
2532   if (isNullConstant(N1))
2533     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2534                                         DL, MVT::Glue));
2535 
2536   // If it cannot overflow, transform into an add.
2537   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2538     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2539                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2540 
2541   return SDValue();
2542 }
2543 
2544 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2545                            SelectionDAG &DAG, const TargetLowering &TLI) {
2546   EVT VT = V.getValueType();
2547 
2548   SDValue Cst;
2549   switch (TLI.getBooleanContents(VT)) {
2550   case TargetLowering::ZeroOrOneBooleanContent:
2551   case TargetLowering::UndefinedBooleanContent:
2552     Cst = DAG.getConstant(1, DL, VT);
2553     break;
2554   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2555     Cst = DAG.getAllOnesConstant(DL, VT);
2556     break;
2557   }
2558 
2559   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2560 }
2561 
2562 /**
2563  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2564  * then the flip also occurs if computing the inverse is the same cost.
2565  * This function returns an empty SDValue in case it cannot flip the boolean
2566  * without increasing the cost of the computation. If you want to flip a boolean
2567  * no matter what, use flipBoolean.
2568  */
2569 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2570                                   const TargetLowering &TLI,
2571                                   bool Force) {
2572   if (Force && isa<ConstantSDNode>(V))
2573     return flipBoolean(V, SDLoc(V), DAG, TLI);
2574 
2575   if (V.getOpcode() != ISD::XOR)
2576     return SDValue();
2577 
2578   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2579   if (!Const)
2580     return SDValue();
2581 
2582   EVT VT = V.getValueType();
2583 
2584   bool IsFlip = false;
2585   switch(TLI.getBooleanContents(VT)) {
2586     case TargetLowering::ZeroOrOneBooleanContent:
2587       IsFlip = Const->isOne();
2588       break;
2589     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2590       IsFlip = Const->isAllOnesValue();
2591       break;
2592     case TargetLowering::UndefinedBooleanContent:
2593       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2594       break;
2595   }
2596 
2597   if (IsFlip)
2598     return V.getOperand(0);
2599   if (Force)
2600     return flipBoolean(V, SDLoc(V), DAG, TLI);
2601   return SDValue();
2602 }
2603 
2604 SDValue DAGCombiner::visitADDO(SDNode *N) {
2605   SDValue N0 = N->getOperand(0);
2606   SDValue N1 = N->getOperand(1);
2607   EVT VT = N0.getValueType();
2608   bool IsSigned = (ISD::SADDO == N->getOpcode());
2609 
2610   EVT CarryVT = N->getValueType(1);
2611   SDLoc DL(N);
2612 
2613   // If the flag result is dead, turn this into an ADD.
2614   if (!N->hasAnyUseOfValue(1))
2615     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2616                      DAG.getUNDEF(CarryVT));
2617 
2618   // canonicalize constant to RHS.
2619   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2620       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2621     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2622 
2623   // fold (addo x, 0) -> x + no carry out
2624   if (isNullOrNullSplat(N1))
2625     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2626 
2627   if (!IsSigned) {
2628     // If it cannot overflow, transform into an add.
2629     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2630       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2631                        DAG.getConstant(0, DL, CarryVT));
2632 
2633     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2634     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2635       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2636                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2637       return CombineTo(N, Sub,
2638                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2639     }
2640 
2641     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2642       return Combined;
2643 
2644     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2645       return Combined;
2646   }
2647 
2648   return SDValue();
2649 }
2650 
2651 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2652   EVT VT = N0.getValueType();
2653   if (VT.isVector())
2654     return SDValue();
2655 
2656   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2657   // If Y + 1 cannot overflow.
2658   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2659     SDValue Y = N1.getOperand(0);
2660     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2661     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2662       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2663                          N1.getOperand(2));
2664   }
2665 
2666   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2667   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2668     if (SDValue Carry = getAsCarry(TLI, N1))
2669       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2670                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2671 
2672   return SDValue();
2673 }
2674 
2675 SDValue DAGCombiner::visitADDE(SDNode *N) {
2676   SDValue N0 = N->getOperand(0);
2677   SDValue N1 = N->getOperand(1);
2678   SDValue CarryIn = N->getOperand(2);
2679 
2680   // canonicalize constant to RHS
2681   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2682   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2683   if (N0C && !N1C)
2684     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2685                        N1, N0, CarryIn);
2686 
2687   // fold (adde x, y, false) -> (addc x, y)
2688   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2689     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2690 
2691   return SDValue();
2692 }
2693 
2694 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2695   SDValue N0 = N->getOperand(0);
2696   SDValue N1 = N->getOperand(1);
2697   SDValue CarryIn = N->getOperand(2);
2698   SDLoc DL(N);
2699 
2700   // canonicalize constant to RHS
2701   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2702   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2703   if (N0C && !N1C)
2704     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2705 
2706   // fold (addcarry x, y, false) -> (uaddo x, y)
2707   if (isNullConstant(CarryIn)) {
2708     if (!LegalOperations ||
2709         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2710       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2711   }
2712 
2713   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2714   if (isNullConstant(N0) && isNullConstant(N1)) {
2715     EVT VT = N0.getValueType();
2716     EVT CarryVT = CarryIn.getValueType();
2717     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2718     AddToWorklist(CarryExt.getNode());
2719     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2720                                     DAG.getConstant(1, DL, VT)),
2721                      DAG.getConstant(0, DL, CarryVT));
2722   }
2723 
2724   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2725     return Combined;
2726 
2727   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2728     return Combined;
2729 
2730   return SDValue();
2731 }
2732 
2733 /**
2734  * If we are facing some sort of diamond carry propapagtion pattern try to
2735  * break it up to generate something like:
2736  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2737  *
2738  * The end result is usually an increase in operation required, but because the
2739  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2740  *
2741  * Patterns typically look something like
2742  *            (uaddo A, B)
2743  *             /       \
2744  *          Carry      Sum
2745  *            |          \
2746  *            | (addcarry *, 0, Z)
2747  *            |       /
2748  *             \   Carry
2749  *              |   /
2750  * (addcarry X, *, *)
2751  *
2752  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2753  * produce a combine with a single path for carry propagation.
2754  */
2755 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2756                                       SDValue X, SDValue Carry0, SDValue Carry1,
2757                                       SDNode *N) {
2758   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2759     return SDValue();
2760   if (Carry1.getOpcode() != ISD::UADDO)
2761     return SDValue();
2762 
2763   SDValue Z;
2764 
2765   /**
2766    * First look for a suitable Z. It will present itself in the form of
2767    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2768    */
2769   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2770       isNullConstant(Carry0.getOperand(1))) {
2771     Z = Carry0.getOperand(2);
2772   } else if (Carry0.getOpcode() == ISD::UADDO &&
2773              isOneConstant(Carry0.getOperand(1))) {
2774     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2775     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2776   } else {
2777     // We couldn't find a suitable Z.
2778     return SDValue();
2779   }
2780 
2781 
2782   auto cancelDiamond = [&](SDValue A,SDValue B) {
2783     SDLoc DL(N);
2784     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2785     Combiner.AddToWorklist(NewY.getNode());
2786     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2787                        DAG.getConstant(0, DL, X.getValueType()),
2788                        NewY.getValue(1));
2789   };
2790 
2791   /**
2792    *      (uaddo A, B)
2793    *           |
2794    *          Sum
2795    *           |
2796    * (addcarry *, 0, Z)
2797    */
2798   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2799     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2800   }
2801 
2802   /**
2803    * (addcarry A, 0, Z)
2804    *         |
2805    *        Sum
2806    *         |
2807    *  (uaddo *, B)
2808    */
2809   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2810     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2811   }
2812 
2813   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2814     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2815   }
2816 
2817   return SDValue();
2818 }
2819 
2820 // If we are facing some sort of diamond carry/borrow in/out pattern try to
2821 // match patterns like:
2822 //
2823 //          (uaddo A, B)            CarryIn
2824 //            |  \                     |
2825 //            |   \                    |
2826 //    PartialSum   PartialCarryOutX   /
2827 //            |        |             /
2828 //            |    ____|____________/
2829 //            |   /    |
2830 //     (uaddo *, *)    \________
2831 //       |  \                   \
2832 //       |   \                   |
2833 //       |    PartialCarryOutY   |
2834 //       |        \              |
2835 //       |         \            /
2836 //   AddCarrySum    |    ______/
2837 //                  |   /
2838 //   CarryOut = (or *, *)
2839 //
2840 // And generate ADDCARRY (or SUBCARRY) with two result values:
2841 //
2842 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
2843 //
2844 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
2845 // a single path for carry/borrow out propagation:
2846 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2847                                    const TargetLowering &TLI, SDValue Carry0,
2848                                    SDValue Carry1, SDNode *N) {
2849   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
2850     return SDValue();
2851   unsigned Opcode = Carry0.getOpcode();
2852   if (Opcode != Carry1.getOpcode())
2853     return SDValue();
2854   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
2855     return SDValue();
2856 
2857   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
2858   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
2859   // the above ASCII art.)
2860   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2861       Carry1.getOperand(1) != Carry0.getValue(0))
2862     std::swap(Carry0, Carry1);
2863   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2864       Carry1.getOperand(1) != Carry0.getValue(0))
2865     return SDValue();
2866 
2867   // The carry in value must be on the righthand side for subtraction.
2868   unsigned CarryInOperandNum =
2869       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
2870   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
2871     return SDValue();
2872   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
2873 
2874   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
2875   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
2876     return SDValue();
2877 
2878   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
2879   // TODO: make getAsCarry() aware of how partial carries are merged.
2880   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
2881     return SDValue();
2882   CarryIn = CarryIn.getOperand(0);
2883   if (CarryIn.getValueType() != MVT::i1)
2884     return SDValue();
2885 
2886   SDLoc DL(N);
2887   SDValue Merged =
2888       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
2889                   Carry0.getOperand(1), CarryIn);
2890 
2891   // Please note that because we have proven that the result of the UADDO/USUBO
2892   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
2893   // therefore prove that if the first UADDO/USUBO overflows, the second
2894   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
2895   // maximum value.
2896   //
2897   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
2898   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
2899   //
2900   // This is important because it means that OR and XOR can be used to merge
2901   // carry flags; and that AND can return a constant zero.
2902   //
2903   // TODO: match other operations that can merge flags (ADD, etc)
2904   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
2905   if (N->getOpcode() == ISD::AND)
2906     return DAG.getConstant(0, DL, MVT::i1);
2907   return Merged.getValue(1);
2908 }
2909 
2910 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2911                                        SDNode *N) {
2912   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
2913   if (isBitwiseNot(N0))
2914     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
2915       SDLoc DL(N);
2916       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
2917                                 N0.getOperand(0), NotC);
2918       return CombineTo(N, Sub,
2919                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2920     }
2921 
2922   // Iff the flag result is dead:
2923   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2924   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
2925   // or the dependency between the instructions.
2926   if ((N0.getOpcode() == ISD::ADD ||
2927        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
2928         N0.getValue(1) != CarryIn)) &&
2929       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2930     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2931                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2932 
2933   /**
2934    * When one of the addcarry argument is itself a carry, we may be facing
2935    * a diamond carry propagation. In which case we try to transform the DAG
2936    * to ensure linear carry propagation if that is possible.
2937    */
2938   if (auto Y = getAsCarry(TLI, N1)) {
2939     // Because both are carries, Y and Z can be swapped.
2940     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2941       return R;
2942     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
2943       return R;
2944   }
2945 
2946   return SDValue();
2947 }
2948 
2949 // Since it may not be valid to emit a fold to zero for vector initializers
2950 // check if we can before folding.
2951 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2952                              SelectionDAG &DAG, bool LegalOperations) {
2953   if (!VT.isVector())
2954     return DAG.getConstant(0, DL, VT);
2955   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2956     return DAG.getConstant(0, DL, VT);
2957   return SDValue();
2958 }
2959 
2960 SDValue DAGCombiner::visitSUB(SDNode *N) {
2961   SDValue N0 = N->getOperand(0);
2962   SDValue N1 = N->getOperand(1);
2963   EVT VT = N0.getValueType();
2964   SDLoc DL(N);
2965 
2966   // fold vector ops
2967   if (VT.isVector()) {
2968     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2969       return FoldedVOp;
2970 
2971     // fold (sub x, 0) -> x, vector edition
2972     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2973       return N0;
2974   }
2975 
2976   // fold (sub x, x) -> 0
2977   // FIXME: Refactor this and xor and other similar operations together.
2978   if (N0 == N1)
2979     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2980 
2981   // fold (sub c1, c2) -> c3
2982   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
2983     return C;
2984 
2985   if (SDValue NewSel = foldBinOpIntoSelect(N))
2986     return NewSel;
2987 
2988   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2989 
2990   // fold (sub x, c) -> (add x, -c)
2991   if (N1C) {
2992     return DAG.getNode(ISD::ADD, DL, VT, N0,
2993                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2994   }
2995 
2996   if (isNullOrNullSplat(N0)) {
2997     unsigned BitWidth = VT.getScalarSizeInBits();
2998     // Right-shifting everything out but the sign bit followed by negation is
2999     // the same as flipping arithmetic/logical shift type without the negation:
3000     // -(X >>u 31) -> (X >>s 31)
3001     // -(X >>s 31) -> (X >>u 31)
3002     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3003       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3004       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3005         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3006         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3007           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3008       }
3009     }
3010 
3011     // 0 - X --> 0 if the sub is NUW.
3012     if (N->getFlags().hasNoUnsignedWrap())
3013       return N0;
3014 
3015     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3016       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3017       // N1 must be 0 because negating the minimum signed value is undefined.
3018       if (N->getFlags().hasNoSignedWrap())
3019         return N0;
3020 
3021       // 0 - X --> X if X is 0 or the minimum signed value.
3022       return N1;
3023     }
3024   }
3025 
3026   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3027   if (isAllOnesOrAllOnesSplat(N0))
3028     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3029 
3030   // fold (A - (0-B)) -> A+B
3031   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3032     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3033 
3034   // fold A-(A-B) -> B
3035   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3036     return N1.getOperand(1);
3037 
3038   // fold (A+B)-A -> B
3039   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3040     return N0.getOperand(1);
3041 
3042   // fold (A+B)-B -> A
3043   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3044     return N0.getOperand(0);
3045 
3046   // fold (A+C1)-C2 -> A+(C1-C2)
3047   if (N0.getOpcode() == ISD::ADD &&
3048       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3049       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3050     SDValue NewC =
3051         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3052     assert(NewC && "Constant folding failed");
3053     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3054   }
3055 
3056   // fold C2-(A+C1) -> (C2-C1)-A
3057   if (N1.getOpcode() == ISD::ADD) {
3058     SDValue N11 = N1.getOperand(1);
3059     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3060         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3061       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3062       assert(NewC && "Constant folding failed");
3063       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3064     }
3065   }
3066 
3067   // fold (A-C1)-C2 -> A-(C1+C2)
3068   if (N0.getOpcode() == ISD::SUB &&
3069       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3070       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3071     SDValue NewC =
3072         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3073     assert(NewC && "Constant folding failed");
3074     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3075   }
3076 
3077   // fold (c1-A)-c2 -> (c1-c2)-A
3078   if (N0.getOpcode() == ISD::SUB &&
3079       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3080       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3081     SDValue NewC =
3082         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3083     assert(NewC && "Constant folding failed");
3084     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3085   }
3086 
3087   // fold ((A+(B+or-C))-B) -> A+or-C
3088   if (N0.getOpcode() == ISD::ADD &&
3089       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3090        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3091       N0.getOperand(1).getOperand(0) == N1)
3092     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3093                        N0.getOperand(1).getOperand(1));
3094 
3095   // fold ((A+(C+B))-B) -> A+C
3096   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3097       N0.getOperand(1).getOperand(1) == N1)
3098     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3099                        N0.getOperand(1).getOperand(0));
3100 
3101   // fold ((A-(B-C))-C) -> A-B
3102   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3103       N0.getOperand(1).getOperand(1) == N1)
3104     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3105                        N0.getOperand(1).getOperand(0));
3106 
3107   // fold (A-(B-C)) -> A+(C-B)
3108   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3109     return DAG.getNode(ISD::ADD, DL, VT, N0,
3110                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3111                                    N1.getOperand(0)));
3112 
3113   // A - (A & B)  ->  A & (~B)
3114   if (N1.getOpcode() == ISD::AND) {
3115     SDValue A = N1.getOperand(0);
3116     SDValue B = N1.getOperand(1);
3117     if (A != N0)
3118       std::swap(A, B);
3119     if (A == N0 &&
3120         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3121       SDValue InvB =
3122           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3123       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3124     }
3125   }
3126 
3127   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3128   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3129     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3130         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3131       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3132                                 N1.getOperand(0).getOperand(1),
3133                                 N1.getOperand(1));
3134       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3135     }
3136     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3137         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3138       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3139                                 N1.getOperand(0),
3140                                 N1.getOperand(1).getOperand(1));
3141       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3142     }
3143   }
3144 
3145   // If either operand of a sub is undef, the result is undef
3146   if (N0.isUndef())
3147     return N0;
3148   if (N1.isUndef())
3149     return N1;
3150 
3151   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3152     return V;
3153 
3154   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3155     return V;
3156 
3157   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3158     return V;
3159 
3160   // (x - y) - 1  ->  add (xor y, -1), x
3161   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3162     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3163                               DAG.getAllOnesConstant(DL, VT));
3164     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3165   }
3166 
3167   // Look for:
3168   //   sub y, (xor x, -1)
3169   // And if the target does not like this form then turn into:
3170   //   add (add x, y), 1
3171   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3172     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3173     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3174   }
3175 
3176   // Hoist one-use addition by non-opaque constant:
3177   //   (x + C) - y  ->  (x - y) + C
3178   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3179       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3180     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3181     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3182   }
3183   // y - (x + C)  ->  (y - x) - C
3184   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3185       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3186     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3187     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3188   }
3189   // (x - C) - y  ->  (x - y) - C
3190   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3191   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3192       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3193     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3194     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3195   }
3196   // (C - x) - y  ->  C - (x + y)
3197   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3198       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3199     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3200     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3201   }
3202 
3203   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3204   // rather than 'sub 0/1' (the sext should get folded).
3205   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3206   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3207       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3208       TLI.getBooleanContents(VT) ==
3209           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3210     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3211     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3212   }
3213 
3214   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3215   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3216     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3217       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3218       SDValue S0 = N1.getOperand(0);
3219       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3220         unsigned OpSizeInBits = VT.getScalarSizeInBits();
3221         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3222           if (C->getAPIntValue() == (OpSizeInBits - 1))
3223             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3224       }
3225     }
3226   }
3227 
3228   // If the relocation model supports it, consider symbol offsets.
3229   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3230     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3231       // fold (sub Sym, c) -> Sym-c
3232       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3233         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3234                                     GA->getOffset() -
3235                                         (uint64_t)N1C->getSExtValue());
3236       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3237       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3238         if (GA->getGlobal() == GB->getGlobal())
3239           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3240                                  DL, VT);
3241     }
3242 
3243   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3244   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3245     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3246     if (TN->getVT() == MVT::i1) {
3247       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3248                                  DAG.getConstant(1, DL, VT));
3249       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3250     }
3251   }
3252 
3253   // Prefer an add for more folding potential and possibly better codegen:
3254   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3255   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3256     SDValue ShAmt = N1.getOperand(1);
3257     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3258     if (ShAmtC &&
3259         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3260       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3261       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3262     }
3263   }
3264 
3265   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3266     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3267     if (SDValue Carry = getAsCarry(TLI, N0)) {
3268       SDValue X = N1;
3269       SDValue Zero = DAG.getConstant(0, DL, VT);
3270       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3271       return DAG.getNode(ISD::ADDCARRY, DL,
3272                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3273                          Carry);
3274     }
3275   }
3276 
3277   return SDValue();
3278 }
3279 
3280 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3281   SDValue N0 = N->getOperand(0);
3282   SDValue N1 = N->getOperand(1);
3283   EVT VT = N0.getValueType();
3284   SDLoc DL(N);
3285 
3286   // fold vector ops
3287   if (VT.isVector()) {
3288     // TODO SimplifyVBinOp
3289 
3290     // fold (sub_sat x, 0) -> x, vector edition
3291     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3292       return N0;
3293   }
3294 
3295   // fold (sub_sat x, undef) -> 0
3296   if (N0.isUndef() || N1.isUndef())
3297     return DAG.getConstant(0, DL, VT);
3298 
3299   // fold (sub_sat x, x) -> 0
3300   if (N0 == N1)
3301     return DAG.getConstant(0, DL, VT);
3302 
3303   // fold (sub_sat c1, c2) -> c3
3304   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3305     return C;
3306 
3307   // fold (sub_sat x, 0) -> x
3308   if (isNullConstant(N1))
3309     return N0;
3310 
3311   return SDValue();
3312 }
3313 
3314 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3315   SDValue N0 = N->getOperand(0);
3316   SDValue N1 = N->getOperand(1);
3317   EVT VT = N0.getValueType();
3318   SDLoc DL(N);
3319 
3320   // If the flag result is dead, turn this into an SUB.
3321   if (!N->hasAnyUseOfValue(1))
3322     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3323                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3324 
3325   // fold (subc x, x) -> 0 + no borrow
3326   if (N0 == N1)
3327     return CombineTo(N, DAG.getConstant(0, DL, VT),
3328                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3329 
3330   // fold (subc x, 0) -> x + no borrow
3331   if (isNullConstant(N1))
3332     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3333 
3334   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3335   if (isAllOnesConstant(N0))
3336     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3337                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3338 
3339   return SDValue();
3340 }
3341 
3342 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3343   SDValue N0 = N->getOperand(0);
3344   SDValue N1 = N->getOperand(1);
3345   EVT VT = N0.getValueType();
3346   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3347 
3348   EVT CarryVT = N->getValueType(1);
3349   SDLoc DL(N);
3350 
3351   // If the flag result is dead, turn this into an SUB.
3352   if (!N->hasAnyUseOfValue(1))
3353     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3354                      DAG.getUNDEF(CarryVT));
3355 
3356   // fold (subo x, x) -> 0 + no borrow
3357   if (N0 == N1)
3358     return CombineTo(N, DAG.getConstant(0, DL, VT),
3359                      DAG.getConstant(0, DL, CarryVT));
3360 
3361   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3362 
3363   // fold (subox, c) -> (addo x, -c)
3364   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3365     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3366                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3367   }
3368 
3369   // fold (subo x, 0) -> x + no borrow
3370   if (isNullOrNullSplat(N1))
3371     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3372 
3373   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3374   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3375     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3376                      DAG.getConstant(0, DL, CarryVT));
3377 
3378   return SDValue();
3379 }
3380 
3381 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3382   SDValue N0 = N->getOperand(0);
3383   SDValue N1 = N->getOperand(1);
3384   SDValue CarryIn = N->getOperand(2);
3385 
3386   // fold (sube x, y, false) -> (subc x, y)
3387   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3388     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3389 
3390   return SDValue();
3391 }
3392 
3393 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3394   SDValue N0 = N->getOperand(0);
3395   SDValue N1 = N->getOperand(1);
3396   SDValue CarryIn = N->getOperand(2);
3397 
3398   // fold (subcarry x, y, false) -> (usubo x, y)
3399   if (isNullConstant(CarryIn)) {
3400     if (!LegalOperations ||
3401         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3402       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3403   }
3404 
3405   return SDValue();
3406 }
3407 
3408 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3409 // UMULFIXSAT here.
3410 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3411   SDValue N0 = N->getOperand(0);
3412   SDValue N1 = N->getOperand(1);
3413   SDValue Scale = N->getOperand(2);
3414   EVT VT = N0.getValueType();
3415 
3416   // fold (mulfix x, undef, scale) -> 0
3417   if (N0.isUndef() || N1.isUndef())
3418     return DAG.getConstant(0, SDLoc(N), VT);
3419 
3420   // Canonicalize constant to RHS (vector doesn't have to splat)
3421   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3422      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3423     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3424 
3425   // fold (mulfix x, 0, scale) -> 0
3426   if (isNullConstant(N1))
3427     return DAG.getConstant(0, SDLoc(N), VT);
3428 
3429   return SDValue();
3430 }
3431 
3432 SDValue DAGCombiner::visitMUL(SDNode *N) {
3433   SDValue N0 = N->getOperand(0);
3434   SDValue N1 = N->getOperand(1);
3435   EVT VT = N0.getValueType();
3436 
3437   // fold (mul x, undef) -> 0
3438   if (N0.isUndef() || N1.isUndef())
3439     return DAG.getConstant(0, SDLoc(N), VT);
3440 
3441   bool N1IsConst = false;
3442   bool N1IsOpaqueConst = false;
3443   APInt ConstValue1;
3444 
3445   // fold vector ops
3446   if (VT.isVector()) {
3447     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3448       return FoldedVOp;
3449 
3450     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3451     assert((!N1IsConst ||
3452             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3453            "Splat APInt should be element width");
3454   } else {
3455     N1IsConst = isa<ConstantSDNode>(N1);
3456     if (N1IsConst) {
3457       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3458       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3459     }
3460   }
3461 
3462   // fold (mul c1, c2) -> c1*c2
3463   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3464     return C;
3465 
3466   // canonicalize constant to RHS (vector doesn't have to splat)
3467   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3468      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3469     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3470 
3471   // fold (mul x, 0) -> 0
3472   if (N1IsConst && ConstValue1.isNullValue())
3473     return N1;
3474 
3475   // fold (mul x, 1) -> x
3476   if (N1IsConst && ConstValue1.isOneValue())
3477     return N0;
3478 
3479   if (SDValue NewSel = foldBinOpIntoSelect(N))
3480     return NewSel;
3481 
3482   // fold (mul x, -1) -> 0-x
3483   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3484     SDLoc DL(N);
3485     return DAG.getNode(ISD::SUB, DL, VT,
3486                        DAG.getConstant(0, DL, VT), N0);
3487   }
3488 
3489   // fold (mul x, (1 << c)) -> x << c
3490   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3491       DAG.isKnownToBeAPowerOfTwo(N1) &&
3492       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3493     SDLoc DL(N);
3494     SDValue LogBase2 = BuildLogBase2(N1, DL);
3495     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3496     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3497     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3498   }
3499 
3500   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3501   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3502     unsigned Log2Val = (-ConstValue1).logBase2();
3503     SDLoc DL(N);
3504     // FIXME: If the input is something that is easily negated (e.g. a
3505     // single-use add), we should put the negate there.
3506     return DAG.getNode(ISD::SUB, DL, VT,
3507                        DAG.getConstant(0, DL, VT),
3508                        DAG.getNode(ISD::SHL, DL, VT, N0,
3509                             DAG.getConstant(Log2Val, DL,
3510                                       getShiftAmountTy(N0.getValueType()))));
3511   }
3512 
3513   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3514   // mul x, (2^N + 1) --> add (shl x, N), x
3515   // mul x, (2^N - 1) --> sub (shl x, N), x
3516   // Examples: x * 33 --> (x << 5) + x
3517   //           x * 15 --> (x << 4) - x
3518   //           x * -33 --> -((x << 5) + x)
3519   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3520   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3521     // TODO: We could handle more general decomposition of any constant by
3522     //       having the target set a limit on number of ops and making a
3523     //       callback to determine that sequence (similar to sqrt expansion).
3524     unsigned MathOp = ISD::DELETED_NODE;
3525     APInt MulC = ConstValue1.abs();
3526     if ((MulC - 1).isPowerOf2())
3527       MathOp = ISD::ADD;
3528     else if ((MulC + 1).isPowerOf2())
3529       MathOp = ISD::SUB;
3530 
3531     if (MathOp != ISD::DELETED_NODE) {
3532       unsigned ShAmt =
3533           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3534       assert(ShAmt < VT.getScalarSizeInBits() &&
3535              "multiply-by-constant generated out of bounds shift");
3536       SDLoc DL(N);
3537       SDValue Shl =
3538           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3539       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3540       if (ConstValue1.isNegative())
3541         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3542       return R;
3543     }
3544   }
3545 
3546   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3547   if (N0.getOpcode() == ISD::SHL &&
3548       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3549       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3550     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3551     if (isConstantOrConstantVector(C3))
3552       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3553   }
3554 
3555   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3556   // use.
3557   {
3558     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3559 
3560     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3561     if (N0.getOpcode() == ISD::SHL &&
3562         isConstantOrConstantVector(N0.getOperand(1)) &&
3563         N0.getNode()->hasOneUse()) {
3564       Sh = N0; Y = N1;
3565     } else if (N1.getOpcode() == ISD::SHL &&
3566                isConstantOrConstantVector(N1.getOperand(1)) &&
3567                N1.getNode()->hasOneUse()) {
3568       Sh = N1; Y = N0;
3569     }
3570 
3571     if (Sh.getNode()) {
3572       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3573       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3574     }
3575   }
3576 
3577   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3578   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3579       N0.getOpcode() == ISD::ADD &&
3580       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3581       isMulAddWithConstProfitable(N, N0, N1))
3582       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3583                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3584                                      N0.getOperand(0), N1),
3585                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3586                                      N0.getOperand(1), N1));
3587 
3588   // reassociate mul
3589   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3590     return RMUL;
3591 
3592   return SDValue();
3593 }
3594 
3595 /// Return true if divmod libcall is available.
3596 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3597                                      const TargetLowering &TLI) {
3598   RTLIB::Libcall LC;
3599   EVT NodeType = Node->getValueType(0);
3600   if (!NodeType.isSimple())
3601     return false;
3602   switch (NodeType.getSimpleVT().SimpleTy) {
3603   default: return false; // No libcall for vector types.
3604   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3605   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3606   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3607   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3608   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3609   }
3610 
3611   return TLI.getLibcallName(LC) != nullptr;
3612 }
3613 
3614 /// Issue divrem if both quotient and remainder are needed.
3615 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3616   if (Node->use_empty())
3617     return SDValue(); // This is a dead node, leave it alone.
3618 
3619   unsigned Opcode = Node->getOpcode();
3620   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3621   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3622 
3623   // DivMod lib calls can still work on non-legal types if using lib-calls.
3624   EVT VT = Node->getValueType(0);
3625   if (VT.isVector() || !VT.isInteger())
3626     return SDValue();
3627 
3628   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3629     return SDValue();
3630 
3631   // If DIVREM is going to get expanded into a libcall,
3632   // but there is no libcall available, then don't combine.
3633   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3634       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3635     return SDValue();
3636 
3637   // If div is legal, it's better to do the normal expansion
3638   unsigned OtherOpcode = 0;
3639   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3640     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3641     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3642       return SDValue();
3643   } else {
3644     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3645     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3646       return SDValue();
3647   }
3648 
3649   SDValue Op0 = Node->getOperand(0);
3650   SDValue Op1 = Node->getOperand(1);
3651   SDValue combined;
3652   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3653          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3654     SDNode *User = *UI;
3655     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3656         User->use_empty())
3657       continue;
3658     // Convert the other matching node(s), too;
3659     // otherwise, the DIVREM may get target-legalized into something
3660     // target-specific that we won't be able to recognize.
3661     unsigned UserOpc = User->getOpcode();
3662     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3663         User->getOperand(0) == Op0 &&
3664         User->getOperand(1) == Op1) {
3665       if (!combined) {
3666         if (UserOpc == OtherOpcode) {
3667           SDVTList VTs = DAG.getVTList(VT, VT);
3668           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3669         } else if (UserOpc == DivRemOpc) {
3670           combined = SDValue(User, 0);
3671         } else {
3672           assert(UserOpc == Opcode);
3673           continue;
3674         }
3675       }
3676       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3677         CombineTo(User, combined);
3678       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3679         CombineTo(User, combined.getValue(1));
3680     }
3681   }
3682   return combined;
3683 }
3684 
3685 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3686   SDValue N0 = N->getOperand(0);
3687   SDValue N1 = N->getOperand(1);
3688   EVT VT = N->getValueType(0);
3689   SDLoc DL(N);
3690 
3691   unsigned Opc = N->getOpcode();
3692   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3693   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3694 
3695   // X / undef -> undef
3696   // X % undef -> undef
3697   // X / 0 -> undef
3698   // X % 0 -> undef
3699   // NOTE: This includes vectors where any divisor element is zero/undef.
3700   if (DAG.isUndef(Opc, {N0, N1}))
3701     return DAG.getUNDEF(VT);
3702 
3703   // undef / X -> 0
3704   // undef % X -> 0
3705   if (N0.isUndef())
3706     return DAG.getConstant(0, DL, VT);
3707 
3708   // 0 / X -> 0
3709   // 0 % X -> 0
3710   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3711   if (N0C && N0C->isNullValue())
3712     return N0;
3713 
3714   // X / X -> 1
3715   // X % X -> 0
3716   if (N0 == N1)
3717     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3718 
3719   // X / 1 -> X
3720   // X % 1 -> 0
3721   // If this is a boolean op (single-bit element type), we can't have
3722   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3723   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3724   // it's a 1.
3725   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3726     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3727 
3728   return SDValue();
3729 }
3730 
3731 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3732   SDValue N0 = N->getOperand(0);
3733   SDValue N1 = N->getOperand(1);
3734   EVT VT = N->getValueType(0);
3735   EVT CCVT = getSetCCResultType(VT);
3736 
3737   // fold vector ops
3738   if (VT.isVector())
3739     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3740       return FoldedVOp;
3741 
3742   SDLoc DL(N);
3743 
3744   // fold (sdiv c1, c2) -> c1/c2
3745   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3746   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
3747     return C;
3748 
3749   // fold (sdiv X, -1) -> 0-X
3750   if (N1C && N1C->isAllOnesValue())
3751     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3752 
3753   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3754   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3755     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3756                          DAG.getConstant(1, DL, VT),
3757                          DAG.getConstant(0, DL, VT));
3758 
3759   if (SDValue V = simplifyDivRem(N, DAG))
3760     return V;
3761 
3762   if (SDValue NewSel = foldBinOpIntoSelect(N))
3763     return NewSel;
3764 
3765   // If we know the sign bits of both operands are zero, strength reduce to a
3766   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3767   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3768     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3769 
3770   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3771     // If the corresponding remainder node exists, update its users with
3772     // (Dividend - (Quotient * Divisor).
3773     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3774                                               { N0, N1 })) {
3775       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3776       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3777       AddToWorklist(Mul.getNode());
3778       AddToWorklist(Sub.getNode());
3779       CombineTo(RemNode, Sub);
3780     }
3781     return V;
3782   }
3783 
3784   // sdiv, srem -> sdivrem
3785   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3786   // true.  Otherwise, we break the simplification logic in visitREM().
3787   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3788   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3789     if (SDValue DivRem = useDivRem(N))
3790         return DivRem;
3791 
3792   return SDValue();
3793 }
3794 
3795 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3796   SDLoc DL(N);
3797   EVT VT = N->getValueType(0);
3798   EVT CCVT = getSetCCResultType(VT);
3799   unsigned BitWidth = VT.getScalarSizeInBits();
3800 
3801   // Helper for determining whether a value is a power-2 constant scalar or a
3802   // vector of such elements.
3803   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3804     if (C->isNullValue() || C->isOpaque())
3805       return false;
3806     if (C->getAPIntValue().isPowerOf2())
3807       return true;
3808     if ((-C->getAPIntValue()).isPowerOf2())
3809       return true;
3810     return false;
3811   };
3812 
3813   // fold (sdiv X, pow2) -> simple ops after legalize
3814   // FIXME: We check for the exact bit here because the generic lowering gives
3815   // better results in that case. The target-specific lowering should learn how
3816   // to handle exact sdivs efficiently.
3817   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3818     // Target-specific implementation of sdiv x, pow2.
3819     if (SDValue Res = BuildSDIVPow2(N))
3820       return Res;
3821 
3822     // Create constants that are functions of the shift amount value.
3823     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3824     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3825     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3826     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3827     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3828     if (!isConstantOrConstantVector(Inexact))
3829       return SDValue();
3830 
3831     // Splat the sign bit into the register
3832     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3833                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3834     AddToWorklist(Sign.getNode());
3835 
3836     // Add (N0 < 0) ? abs2 - 1 : 0;
3837     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3838     AddToWorklist(Srl.getNode());
3839     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3840     AddToWorklist(Add.getNode());
3841     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3842     AddToWorklist(Sra.getNode());
3843 
3844     // Special case: (sdiv X, 1) -> X
3845     // Special Case: (sdiv X, -1) -> 0-X
3846     SDValue One = DAG.getConstant(1, DL, VT);
3847     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3848     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3849     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3850     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3851     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3852 
3853     // If dividing by a positive value, we're done. Otherwise, the result must
3854     // be negated.
3855     SDValue Zero = DAG.getConstant(0, DL, VT);
3856     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3857 
3858     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3859     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3860     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3861     return Res;
3862   }
3863 
3864   // If integer divide is expensive and we satisfy the requirements, emit an
3865   // alternate sequence.  Targets may check function attributes for size/speed
3866   // trade-offs.
3867   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3868   if (isConstantOrConstantVector(N1) &&
3869       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3870     if (SDValue Op = BuildSDIV(N))
3871       return Op;
3872 
3873   return SDValue();
3874 }
3875 
3876 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3877   SDValue N0 = N->getOperand(0);
3878   SDValue N1 = N->getOperand(1);
3879   EVT VT = N->getValueType(0);
3880   EVT CCVT = getSetCCResultType(VT);
3881 
3882   // fold vector ops
3883   if (VT.isVector())
3884     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3885       return FoldedVOp;
3886 
3887   SDLoc DL(N);
3888 
3889   // fold (udiv c1, c2) -> c1/c2
3890   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3891   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
3892     return C;
3893 
3894   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3895   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3896     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3897                          DAG.getConstant(1, DL, VT),
3898                          DAG.getConstant(0, DL, VT));
3899 
3900   if (SDValue V = simplifyDivRem(N, DAG))
3901     return V;
3902 
3903   if (SDValue NewSel = foldBinOpIntoSelect(N))
3904     return NewSel;
3905 
3906   if (SDValue V = visitUDIVLike(N0, N1, N)) {
3907     // If the corresponding remainder node exists, update its users with
3908     // (Dividend - (Quotient * Divisor).
3909     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3910                                               { N0, N1 })) {
3911       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3912       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3913       AddToWorklist(Mul.getNode());
3914       AddToWorklist(Sub.getNode());
3915       CombineTo(RemNode, Sub);
3916     }
3917     return V;
3918   }
3919 
3920   // sdiv, srem -> sdivrem
3921   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3922   // true.  Otherwise, we break the simplification logic in visitREM().
3923   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3924   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3925     if (SDValue DivRem = useDivRem(N))
3926         return DivRem;
3927 
3928   return SDValue();
3929 }
3930 
3931 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3932   SDLoc DL(N);
3933   EVT VT = N->getValueType(0);
3934 
3935   // fold (udiv x, (1 << c)) -> x >>u c
3936   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3937       DAG.isKnownToBeAPowerOfTwo(N1)) {
3938     SDValue LogBase2 = BuildLogBase2(N1, DL);
3939     AddToWorklist(LogBase2.getNode());
3940 
3941     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3942     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3943     AddToWorklist(Trunc.getNode());
3944     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3945   }
3946 
3947   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3948   if (N1.getOpcode() == ISD::SHL) {
3949     SDValue N10 = N1.getOperand(0);
3950     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3951         DAG.isKnownToBeAPowerOfTwo(N10)) {
3952       SDValue LogBase2 = BuildLogBase2(N10, DL);
3953       AddToWorklist(LogBase2.getNode());
3954 
3955       EVT ADDVT = N1.getOperand(1).getValueType();
3956       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3957       AddToWorklist(Trunc.getNode());
3958       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3959       AddToWorklist(Add.getNode());
3960       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3961     }
3962   }
3963 
3964   // fold (udiv x, c) -> alternate
3965   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3966   if (isConstantOrConstantVector(N1) &&
3967       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3968     if (SDValue Op = BuildUDIV(N))
3969       return Op;
3970 
3971   return SDValue();
3972 }
3973 
3974 // handles ISD::SREM and ISD::UREM
3975 SDValue DAGCombiner::visitREM(SDNode *N) {
3976   unsigned Opcode = N->getOpcode();
3977   SDValue N0 = N->getOperand(0);
3978   SDValue N1 = N->getOperand(1);
3979   EVT VT = N->getValueType(0);
3980   EVT CCVT = getSetCCResultType(VT);
3981 
3982   bool isSigned = (Opcode == ISD::SREM);
3983   SDLoc DL(N);
3984 
3985   // fold (rem c1, c2) -> c1%c2
3986   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3987   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3988     return C;
3989 
3990   // fold (urem X, -1) -> select(X == -1, 0, x)
3991   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3992     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3993                          DAG.getConstant(0, DL, VT), N0);
3994 
3995   if (SDValue V = simplifyDivRem(N, DAG))
3996     return V;
3997 
3998   if (SDValue NewSel = foldBinOpIntoSelect(N))
3999     return NewSel;
4000 
4001   if (isSigned) {
4002     // If we know the sign bits of both operands are zero, strength reduce to a
4003     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4004     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4005       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4006   } else {
4007     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4008     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4009       // fold (urem x, pow2) -> (and x, pow2-1)
4010       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4011       AddToWorklist(Add.getNode());
4012       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4013     }
4014     if (N1.getOpcode() == ISD::SHL &&
4015         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4016       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4017       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4018       AddToWorklist(Add.getNode());
4019       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4020     }
4021   }
4022 
4023   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4024 
4025   // If X/C can be simplified by the division-by-constant logic, lower
4026   // X%C to the equivalent of X-X/C*C.
4027   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4028   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4029   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4030   // combine will not return a DIVREM.  Regardless, checking cheapness here
4031   // makes sense since the simplification results in fatter code.
4032   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4033     SDValue OptimizedDiv =
4034         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4035     if (OptimizedDiv.getNode()) {
4036       // If the equivalent Div node also exists, update its users.
4037       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4038       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4039                                                 { N0, N1 }))
4040         CombineTo(DivNode, OptimizedDiv);
4041       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4042       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4043       AddToWorklist(OptimizedDiv.getNode());
4044       AddToWorklist(Mul.getNode());
4045       return Sub;
4046     }
4047   }
4048 
4049   // sdiv, srem -> sdivrem
4050   if (SDValue DivRem = useDivRem(N))
4051     return DivRem.getValue(1);
4052 
4053   return SDValue();
4054 }
4055 
4056 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4057   SDValue N0 = N->getOperand(0);
4058   SDValue N1 = N->getOperand(1);
4059   EVT VT = N->getValueType(0);
4060   SDLoc DL(N);
4061 
4062   if (VT.isVector()) {
4063     // fold (mulhs x, 0) -> 0
4064     // do not return N0/N1, because undef node may exist.
4065     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4066         ISD::isBuildVectorAllZeros(N1.getNode()))
4067       return DAG.getConstant(0, DL, VT);
4068   }
4069 
4070   // fold (mulhs x, 0) -> 0
4071   if (isNullConstant(N1))
4072     return N1;
4073   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4074   if (isOneConstant(N1))
4075     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4076                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4077                                        getShiftAmountTy(N0.getValueType())));
4078 
4079   // fold (mulhs x, undef) -> 0
4080   if (N0.isUndef() || N1.isUndef())
4081     return DAG.getConstant(0, DL, VT);
4082 
4083   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4084   // plus a shift.
4085   if (VT.isSimple() && !VT.isVector()) {
4086     MVT Simple = VT.getSimpleVT();
4087     unsigned SimpleSize = Simple.getSizeInBits();
4088     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4089     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4090       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4091       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4092       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4093       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4094             DAG.getConstant(SimpleSize, DL,
4095                             getShiftAmountTy(N1.getValueType())));
4096       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4097     }
4098   }
4099 
4100   return SDValue();
4101 }
4102 
4103 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4104   SDValue N0 = N->getOperand(0);
4105   SDValue N1 = N->getOperand(1);
4106   EVT VT = N->getValueType(0);
4107   SDLoc DL(N);
4108 
4109   if (VT.isVector()) {
4110     // fold (mulhu x, 0) -> 0
4111     // do not return N0/N1, because undef node may exist.
4112     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4113         ISD::isBuildVectorAllZeros(N1.getNode()))
4114       return DAG.getConstant(0, DL, VT);
4115   }
4116 
4117   // fold (mulhu x, 0) -> 0
4118   if (isNullConstant(N1))
4119     return N1;
4120   // fold (mulhu x, 1) -> 0
4121   if (isOneConstant(N1))
4122     return DAG.getConstant(0, DL, N0.getValueType());
4123   // fold (mulhu x, undef) -> 0
4124   if (N0.isUndef() || N1.isUndef())
4125     return DAG.getConstant(0, DL, VT);
4126 
4127   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4128   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4129       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4130     unsigned NumEltBits = VT.getScalarSizeInBits();
4131     SDValue LogBase2 = BuildLogBase2(N1, DL);
4132     SDValue SRLAmt = DAG.getNode(
4133         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4134     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4135     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4136     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4137   }
4138 
4139   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4140   // plus a shift.
4141   if (VT.isSimple() && !VT.isVector()) {
4142     MVT Simple = VT.getSimpleVT();
4143     unsigned SimpleSize = Simple.getSizeInBits();
4144     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4145     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4146       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4147       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4148       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4149       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4150             DAG.getConstant(SimpleSize, DL,
4151                             getShiftAmountTy(N1.getValueType())));
4152       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4153     }
4154   }
4155 
4156   return SDValue();
4157 }
4158 
4159 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4160 /// give the opcodes for the two computations that are being performed. Return
4161 /// true if a simplification was made.
4162 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4163                                                 unsigned HiOp) {
4164   // If the high half is not needed, just compute the low half.
4165   bool HiExists = N->hasAnyUseOfValue(1);
4166   if (!HiExists && (!LegalOperations ||
4167                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4168     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4169     return CombineTo(N, Res, Res);
4170   }
4171 
4172   // If the low half is not needed, just compute the high half.
4173   bool LoExists = N->hasAnyUseOfValue(0);
4174   if (!LoExists && (!LegalOperations ||
4175                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4176     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4177     return CombineTo(N, Res, Res);
4178   }
4179 
4180   // If both halves are used, return as it is.
4181   if (LoExists && HiExists)
4182     return SDValue();
4183 
4184   // If the two computed results can be simplified separately, separate them.
4185   if (LoExists) {
4186     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4187     AddToWorklist(Lo.getNode());
4188     SDValue LoOpt = combine(Lo.getNode());
4189     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4190         (!LegalOperations ||
4191          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4192       return CombineTo(N, LoOpt, LoOpt);
4193   }
4194 
4195   if (HiExists) {
4196     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4197     AddToWorklist(Hi.getNode());
4198     SDValue HiOpt = combine(Hi.getNode());
4199     if (HiOpt.getNode() && HiOpt != Hi &&
4200         (!LegalOperations ||
4201          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4202       return CombineTo(N, HiOpt, HiOpt);
4203   }
4204 
4205   return SDValue();
4206 }
4207 
4208 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4209   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4210     return Res;
4211 
4212   EVT VT = N->getValueType(0);
4213   SDLoc DL(N);
4214 
4215   // If the type is twice as wide is legal, transform the mulhu to a wider
4216   // multiply plus a shift.
4217   if (VT.isSimple() && !VT.isVector()) {
4218     MVT Simple = VT.getSimpleVT();
4219     unsigned SimpleSize = Simple.getSizeInBits();
4220     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4221     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4222       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4223       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4224       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4225       // Compute the high part as N1.
4226       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4227             DAG.getConstant(SimpleSize, DL,
4228                             getShiftAmountTy(Lo.getValueType())));
4229       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4230       // Compute the low part as N0.
4231       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4232       return CombineTo(N, Lo, Hi);
4233     }
4234   }
4235 
4236   return SDValue();
4237 }
4238 
4239 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4240   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4241     return Res;
4242 
4243   EVT VT = N->getValueType(0);
4244   SDLoc DL(N);
4245 
4246   // (umul_lohi N0, 0) -> (0, 0)
4247   if (isNullConstant(N->getOperand(1))) {
4248     SDValue Zero = DAG.getConstant(0, DL, VT);
4249     return CombineTo(N, Zero, Zero);
4250   }
4251 
4252   // (umul_lohi N0, 1) -> (N0, 0)
4253   if (isOneConstant(N->getOperand(1))) {
4254     SDValue Zero = DAG.getConstant(0, DL, VT);
4255     return CombineTo(N, N->getOperand(0), Zero);
4256   }
4257 
4258   // If the type is twice as wide is legal, transform the mulhu to a wider
4259   // multiply plus a shift.
4260   if (VT.isSimple() && !VT.isVector()) {
4261     MVT Simple = VT.getSimpleVT();
4262     unsigned SimpleSize = Simple.getSizeInBits();
4263     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4264     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4265       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4266       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4267       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4268       // Compute the high part as N1.
4269       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4270             DAG.getConstant(SimpleSize, DL,
4271                             getShiftAmountTy(Lo.getValueType())));
4272       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4273       // Compute the low part as N0.
4274       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4275       return CombineTo(N, Lo, Hi);
4276     }
4277   }
4278 
4279   return SDValue();
4280 }
4281 
4282 SDValue DAGCombiner::visitMULO(SDNode *N) {
4283   SDValue N0 = N->getOperand(0);
4284   SDValue N1 = N->getOperand(1);
4285   EVT VT = N0.getValueType();
4286   bool IsSigned = (ISD::SMULO == N->getOpcode());
4287 
4288   EVT CarryVT = N->getValueType(1);
4289   SDLoc DL(N);
4290 
4291   // canonicalize constant to RHS.
4292   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4293       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4294     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4295 
4296   // fold (mulo x, 0) -> 0 + no carry out
4297   if (isNullOrNullSplat(N1))
4298     return CombineTo(N, DAG.getConstant(0, DL, VT),
4299                      DAG.getConstant(0, DL, CarryVT));
4300 
4301   // (mulo x, 2) -> (addo x, x)
4302   if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4303     if (C2->getAPIntValue() == 2)
4304       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4305                          N->getVTList(), N0, N0);
4306 
4307   return SDValue();
4308 }
4309 
4310 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4311   SDValue N0 = N->getOperand(0);
4312   SDValue N1 = N->getOperand(1);
4313   EVT VT = N0.getValueType();
4314   unsigned Opcode = N->getOpcode();
4315 
4316   // fold vector ops
4317   if (VT.isVector())
4318     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4319       return FoldedVOp;
4320 
4321   // fold operation with constant operands.
4322   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4323     return C;
4324 
4325   // canonicalize constant to RHS
4326   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4327       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4328     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4329 
4330   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4331   // Only do this if the current op isn't legal and the flipped is.
4332   if (!TLI.isOperationLegal(Opcode, VT) &&
4333       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4334       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4335     unsigned AltOpcode;
4336     switch (Opcode) {
4337     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4338     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4339     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4340     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4341     default: llvm_unreachable("Unknown MINMAX opcode");
4342     }
4343     if (TLI.isOperationLegal(AltOpcode, VT))
4344       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4345   }
4346 
4347   return SDValue();
4348 }
4349 
4350 /// If this is a bitwise logic instruction and both operands have the same
4351 /// opcode, try to sink the other opcode after the logic instruction.
4352 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4353   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4354   EVT VT = N0.getValueType();
4355   unsigned LogicOpcode = N->getOpcode();
4356   unsigned HandOpcode = N0.getOpcode();
4357   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4358           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4359   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4360 
4361   // Bail early if none of these transforms apply.
4362   if (N0.getNumOperands() == 0)
4363     return SDValue();
4364 
4365   // FIXME: We should check number of uses of the operands to not increase
4366   //        the instruction count for all transforms.
4367 
4368   // Handle size-changing casts.
4369   SDValue X = N0.getOperand(0);
4370   SDValue Y = N1.getOperand(0);
4371   EVT XVT = X.getValueType();
4372   SDLoc DL(N);
4373   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4374       HandOpcode == ISD::SIGN_EXTEND) {
4375     // If both operands have other uses, this transform would create extra
4376     // instructions without eliminating anything.
4377     if (!N0.hasOneUse() && !N1.hasOneUse())
4378       return SDValue();
4379     // We need matching integer source types.
4380     if (XVT != Y.getValueType())
4381       return SDValue();
4382     // Don't create an illegal op during or after legalization. Don't ever
4383     // create an unsupported vector op.
4384     if ((VT.isVector() || LegalOperations) &&
4385         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4386       return SDValue();
4387     // Avoid infinite looping with PromoteIntBinOp.
4388     // TODO: Should we apply desirable/legal constraints to all opcodes?
4389     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4390         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4391       return SDValue();
4392     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4393     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4394     return DAG.getNode(HandOpcode, DL, VT, Logic);
4395   }
4396 
4397   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4398   if (HandOpcode == ISD::TRUNCATE) {
4399     // If both operands have other uses, this transform would create extra
4400     // instructions without eliminating anything.
4401     if (!N0.hasOneUse() && !N1.hasOneUse())
4402       return SDValue();
4403     // We need matching source types.
4404     if (XVT != Y.getValueType())
4405       return SDValue();
4406     // Don't create an illegal op during or after legalization.
4407     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4408       return SDValue();
4409     // Be extra careful sinking truncate. If it's free, there's no benefit in
4410     // widening a binop. Also, don't create a logic op on an illegal type.
4411     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4412       return SDValue();
4413     if (!TLI.isTypeLegal(XVT))
4414       return SDValue();
4415     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4416     return DAG.getNode(HandOpcode, DL, VT, Logic);
4417   }
4418 
4419   // For binops SHL/SRL/SRA/AND:
4420   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4421   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4422        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4423       N0.getOperand(1) == N1.getOperand(1)) {
4424     // If either operand has other uses, this transform is not an improvement.
4425     if (!N0.hasOneUse() || !N1.hasOneUse())
4426       return SDValue();
4427     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4428     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4429   }
4430 
4431   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4432   if (HandOpcode == ISD::BSWAP) {
4433     // If either operand has other uses, this transform is not an improvement.
4434     if (!N0.hasOneUse() || !N1.hasOneUse())
4435       return SDValue();
4436     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4437     return DAG.getNode(HandOpcode, DL, VT, Logic);
4438   }
4439 
4440   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4441   // Only perform this optimization up until type legalization, before
4442   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4443   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4444   // we don't want to undo this promotion.
4445   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4446   // on scalars.
4447   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4448        Level <= AfterLegalizeTypes) {
4449     // Input types must be integer and the same.
4450     if (XVT.isInteger() && XVT == Y.getValueType() &&
4451         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4452           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4453       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4454       return DAG.getNode(HandOpcode, DL, VT, Logic);
4455     }
4456   }
4457 
4458   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4459   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4460   // If both shuffles use the same mask, and both shuffle within a single
4461   // vector, then it is worthwhile to move the swizzle after the operation.
4462   // The type-legalizer generates this pattern when loading illegal
4463   // vector types from memory. In many cases this allows additional shuffle
4464   // optimizations.
4465   // There are other cases where moving the shuffle after the xor/and/or
4466   // is profitable even if shuffles don't perform a swizzle.
4467   // If both shuffles use the same mask, and both shuffles have the same first
4468   // or second operand, then it might still be profitable to move the shuffle
4469   // after the xor/and/or operation.
4470   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4471     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4472     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4473     assert(X.getValueType() == Y.getValueType() &&
4474            "Inputs to shuffles are not the same type");
4475 
4476     // Check that both shuffles use the same mask. The masks are known to be of
4477     // the same length because the result vector type is the same.
4478     // Check also that shuffles have only one use to avoid introducing extra
4479     // instructions.
4480     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4481         !SVN0->getMask().equals(SVN1->getMask()))
4482       return SDValue();
4483 
4484     // Don't try to fold this node if it requires introducing a
4485     // build vector of all zeros that might be illegal at this stage.
4486     SDValue ShOp = N0.getOperand(1);
4487     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4488       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4489 
4490     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4491     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4492       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4493                                   N0.getOperand(0), N1.getOperand(0));
4494       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4495     }
4496 
4497     // Don't try to fold this node if it requires introducing a
4498     // build vector of all zeros that might be illegal at this stage.
4499     ShOp = N0.getOperand(0);
4500     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4501       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4502 
4503     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4504     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4505       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4506                                   N1.getOperand(1));
4507       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4508     }
4509   }
4510 
4511   return SDValue();
4512 }
4513 
4514 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4515 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4516                                        const SDLoc &DL) {
4517   SDValue LL, LR, RL, RR, N0CC, N1CC;
4518   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4519       !isSetCCEquivalent(N1, RL, RR, N1CC))
4520     return SDValue();
4521 
4522   assert(N0.getValueType() == N1.getValueType() &&
4523          "Unexpected operand types for bitwise logic op");
4524   assert(LL.getValueType() == LR.getValueType() &&
4525          RL.getValueType() == RR.getValueType() &&
4526          "Unexpected operand types for setcc");
4527 
4528   // If we're here post-legalization or the logic op type is not i1, the logic
4529   // op type must match a setcc result type. Also, all folds require new
4530   // operations on the left and right operands, so those types must match.
4531   EVT VT = N0.getValueType();
4532   EVT OpVT = LL.getValueType();
4533   if (LegalOperations || VT.getScalarType() != MVT::i1)
4534     if (VT != getSetCCResultType(OpVT))
4535       return SDValue();
4536   if (OpVT != RL.getValueType())
4537     return SDValue();
4538 
4539   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4540   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4541   bool IsInteger = OpVT.isInteger();
4542   if (LR == RR && CC0 == CC1 && IsInteger) {
4543     bool IsZero = isNullOrNullSplat(LR);
4544     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4545 
4546     // All bits clear?
4547     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4548     // All sign bits clear?
4549     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4550     // Any bits set?
4551     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4552     // Any sign bits set?
4553     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4554 
4555     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4556     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4557     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4558     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4559     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4560       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4561       AddToWorklist(Or.getNode());
4562       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4563     }
4564 
4565     // All bits set?
4566     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4567     // All sign bits set?
4568     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4569     // Any bits clear?
4570     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4571     // Any sign bits clear?
4572     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4573 
4574     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4575     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4576     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4577     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4578     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4579       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4580       AddToWorklist(And.getNode());
4581       return DAG.getSetCC(DL, VT, And, LR, CC1);
4582     }
4583   }
4584 
4585   // TODO: What is the 'or' equivalent of this fold?
4586   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4587   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4588       IsInteger && CC0 == ISD::SETNE &&
4589       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4590        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4591     SDValue One = DAG.getConstant(1, DL, OpVT);
4592     SDValue Two = DAG.getConstant(2, DL, OpVT);
4593     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4594     AddToWorklist(Add.getNode());
4595     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4596   }
4597 
4598   // Try more general transforms if the predicates match and the only user of
4599   // the compares is the 'and' or 'or'.
4600   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4601       N0.hasOneUse() && N1.hasOneUse()) {
4602     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4603     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4604     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4605       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4606       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4607       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4608       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4609       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4610     }
4611 
4612     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4613     // TODO - support non-uniform vector amounts.
4614     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4615       // Match a shared variable operand and 2 non-opaque constant operands.
4616       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4617       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4618       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4619         // Canonicalize larger constant as C0.
4620         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4621           std::swap(C0, C1);
4622 
4623         // The difference of the constants must be a single bit.
4624         const APInt &C0Val = C0->getAPIntValue();
4625         const APInt &C1Val = C1->getAPIntValue();
4626         if ((C0Val - C1Val).isPowerOf2()) {
4627           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4628           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4629           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4630           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4631           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4632           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4633           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4634           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4635         }
4636       }
4637     }
4638   }
4639 
4640   // Canonicalize equivalent operands to LL == RL.
4641   if (LL == RR && LR == RL) {
4642     CC1 = ISD::getSetCCSwappedOperands(CC1);
4643     std::swap(RL, RR);
4644   }
4645 
4646   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4647   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4648   if (LL == RL && LR == RR) {
4649     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
4650                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
4651     if (NewCC != ISD::SETCC_INVALID &&
4652         (!LegalOperations ||
4653          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4654           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4655       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4656   }
4657 
4658   return SDValue();
4659 }
4660 
4661 /// This contains all DAGCombine rules which reduce two values combined by
4662 /// an And operation to a single value. This makes them reusable in the context
4663 /// of visitSELECT(). Rules involving constants are not included as
4664 /// visitSELECT() already handles those cases.
4665 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4666   EVT VT = N1.getValueType();
4667   SDLoc DL(N);
4668 
4669   // fold (and x, undef) -> 0
4670   if (N0.isUndef() || N1.isUndef())
4671     return DAG.getConstant(0, DL, VT);
4672 
4673   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4674     return V;
4675 
4676   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4677       VT.getSizeInBits() <= 64) {
4678     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4679       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4680         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4681         // immediate for an add, but it is legal if its top c2 bits are set,
4682         // transform the ADD so the immediate doesn't need to be materialized
4683         // in a register.
4684         APInt ADDC = ADDI->getAPIntValue();
4685         APInt SRLC = SRLI->getAPIntValue();
4686         if (ADDC.getMinSignedBits() <= 64 &&
4687             SRLC.ult(VT.getSizeInBits()) &&
4688             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4689           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4690                                              SRLC.getZExtValue());
4691           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4692             ADDC |= Mask;
4693             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4694               SDLoc DL0(N0);
4695               SDValue NewAdd =
4696                 DAG.getNode(ISD::ADD, DL0, VT,
4697                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4698               CombineTo(N0.getNode(), NewAdd);
4699               // Return N so it doesn't get rechecked!
4700               return SDValue(N, 0);
4701             }
4702           }
4703         }
4704       }
4705     }
4706   }
4707 
4708   // Reduce bit extract of low half of an integer to the narrower type.
4709   // (and (srl i64:x, K), KMask) ->
4710   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4711   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4712     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4713       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4714         unsigned Size = VT.getSizeInBits();
4715         const APInt &AndMask = CAnd->getAPIntValue();
4716         unsigned ShiftBits = CShift->getZExtValue();
4717 
4718         // Bail out, this node will probably disappear anyway.
4719         if (ShiftBits == 0)
4720           return SDValue();
4721 
4722         unsigned MaskBits = AndMask.countTrailingOnes();
4723         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4724 
4725         if (AndMask.isMask() &&
4726             // Required bits must not span the two halves of the integer and
4727             // must fit in the half size type.
4728             (ShiftBits + MaskBits <= Size / 2) &&
4729             TLI.isNarrowingProfitable(VT, HalfVT) &&
4730             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4731             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4732             TLI.isTruncateFree(VT, HalfVT) &&
4733             TLI.isZExtFree(HalfVT, VT)) {
4734           // The isNarrowingProfitable is to avoid regressions on PPC and
4735           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4736           // on downstream users of this. Those patterns could probably be
4737           // extended to handle extensions mixed in.
4738 
4739           SDValue SL(N0);
4740           assert(MaskBits <= Size);
4741 
4742           // Extracting the highest bit of the low half.
4743           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4744           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4745                                       N0.getOperand(0));
4746 
4747           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4748           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4749           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4750           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4751           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4752         }
4753       }
4754     }
4755   }
4756 
4757   return SDValue();
4758 }
4759 
4760 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4761                                    EVT LoadResultTy, EVT &ExtVT) {
4762   if (!AndC->getAPIntValue().isMask())
4763     return false;
4764 
4765   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4766 
4767   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4768   EVT LoadedVT = LoadN->getMemoryVT();
4769 
4770   if (ExtVT == LoadedVT &&
4771       (!LegalOperations ||
4772        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4773     // ZEXTLOAD will match without needing to change the size of the value being
4774     // loaded.
4775     return true;
4776   }
4777 
4778   // Do not change the width of a volatile or atomic loads.
4779   if (!LoadN->isSimple())
4780     return false;
4781 
4782   // Do not generate loads of non-round integer types since these can
4783   // be expensive (and would be wrong if the type is not byte sized).
4784   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4785     return false;
4786 
4787   if (LegalOperations &&
4788       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4789     return false;
4790 
4791   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4792     return false;
4793 
4794   return true;
4795 }
4796 
4797 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4798                                     ISD::LoadExtType ExtType, EVT &MemVT,
4799                                     unsigned ShAmt) {
4800   if (!LDST)
4801     return false;
4802   // Only allow byte offsets.
4803   if (ShAmt % 8)
4804     return false;
4805 
4806   // Do not generate loads of non-round integer types since these can
4807   // be expensive (and would be wrong if the type is not byte sized).
4808   if (!MemVT.isRound())
4809     return false;
4810 
4811   // Don't change the width of a volatile or atomic loads.
4812   if (!LDST->isSimple())
4813     return false;
4814 
4815   // Verify that we are actually reducing a load width here.
4816   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4817     return false;
4818 
4819   // Ensure that this isn't going to produce an unsupported memory access.
4820   if (ShAmt &&
4821       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4822                               LDST->getAddressSpace(), ShAmt / 8,
4823                               LDST->getMemOperand()->getFlags()))
4824     return false;
4825 
4826   // It's not possible to generate a constant of extended or untyped type.
4827   EVT PtrType = LDST->getBasePtr().getValueType();
4828   if (PtrType == MVT::Untyped || PtrType.isExtended())
4829     return false;
4830 
4831   if (isa<LoadSDNode>(LDST)) {
4832     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4833     // Don't transform one with multiple uses, this would require adding a new
4834     // load.
4835     if (!SDValue(Load, 0).hasOneUse())
4836       return false;
4837 
4838     if (LegalOperations &&
4839         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4840       return false;
4841 
4842     // For the transform to be legal, the load must produce only two values
4843     // (the value loaded and the chain).  Don't transform a pre-increment
4844     // load, for example, which produces an extra value.  Otherwise the
4845     // transformation is not equivalent, and the downstream logic to replace
4846     // uses gets things wrong.
4847     if (Load->getNumValues() > 2)
4848       return false;
4849 
4850     // If the load that we're shrinking is an extload and we're not just
4851     // discarding the extension we can't simply shrink the load. Bail.
4852     // TODO: It would be possible to merge the extensions in some cases.
4853     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4854         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4855       return false;
4856 
4857     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4858       return false;
4859   } else {
4860     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4861     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4862     // Can't write outside the original store
4863     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4864       return false;
4865 
4866     if (LegalOperations &&
4867         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4868       return false;
4869   }
4870   return true;
4871 }
4872 
4873 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4874                                     SmallVectorImpl<LoadSDNode*> &Loads,
4875                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4876                                     ConstantSDNode *Mask,
4877                                     SDNode *&NodeToMask) {
4878   // Recursively search for the operands, looking for loads which can be
4879   // narrowed.
4880   for (SDValue Op : N->op_values()) {
4881     if (Op.getValueType().isVector())
4882       return false;
4883 
4884     // Some constants may need fixing up later if they are too large.
4885     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4886       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4887           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4888         NodesWithConsts.insert(N);
4889       continue;
4890     }
4891 
4892     if (!Op.hasOneUse())
4893       return false;
4894 
4895     switch(Op.getOpcode()) {
4896     case ISD::LOAD: {
4897       auto *Load = cast<LoadSDNode>(Op);
4898       EVT ExtVT;
4899       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4900           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4901 
4902         // ZEXTLOAD is already small enough.
4903         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4904             ExtVT.bitsGE(Load->getMemoryVT()))
4905           continue;
4906 
4907         // Use LE to convert equal sized loads to zext.
4908         if (ExtVT.bitsLE(Load->getMemoryVT()))
4909           Loads.push_back(Load);
4910 
4911         continue;
4912       }
4913       return false;
4914     }
4915     case ISD::ZERO_EXTEND:
4916     case ISD::AssertZext: {
4917       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4918       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4919       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4920         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4921         Op.getOperand(0).getValueType();
4922 
4923       // We can accept extending nodes if the mask is wider or an equal
4924       // width to the original type.
4925       if (ExtVT.bitsGE(VT))
4926         continue;
4927       break;
4928     }
4929     case ISD::OR:
4930     case ISD::XOR:
4931     case ISD::AND:
4932       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4933                              NodeToMask))
4934         return false;
4935       continue;
4936     }
4937 
4938     // Allow one node which will masked along with any loads found.
4939     if (NodeToMask)
4940       return false;
4941 
4942     // Also ensure that the node to be masked only produces one data result.
4943     NodeToMask = Op.getNode();
4944     if (NodeToMask->getNumValues() > 1) {
4945       bool HasValue = false;
4946       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4947         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4948         if (VT != MVT::Glue && VT != MVT::Other) {
4949           if (HasValue) {
4950             NodeToMask = nullptr;
4951             return false;
4952           }
4953           HasValue = true;
4954         }
4955       }
4956       assert(HasValue && "Node to be masked has no data result?");
4957     }
4958   }
4959   return true;
4960 }
4961 
4962 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
4963   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4964   if (!Mask)
4965     return false;
4966 
4967   if (!Mask->getAPIntValue().isMask())
4968     return false;
4969 
4970   // No need to do anything if the and directly uses a load.
4971   if (isa<LoadSDNode>(N->getOperand(0)))
4972     return false;
4973 
4974   SmallVector<LoadSDNode*, 8> Loads;
4975   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4976   SDNode *FixupNode = nullptr;
4977   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4978     if (Loads.size() == 0)
4979       return false;
4980 
4981     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4982     SDValue MaskOp = N->getOperand(1);
4983 
4984     // If it exists, fixup the single node we allow in the tree that needs
4985     // masking.
4986     if (FixupNode) {
4987       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4988       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4989                                 FixupNode->getValueType(0),
4990                                 SDValue(FixupNode, 0), MaskOp);
4991       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4992       if (And.getOpcode() == ISD ::AND)
4993         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4994     }
4995 
4996     // Narrow any constants that need it.
4997     for (auto *LogicN : NodesWithConsts) {
4998       SDValue Op0 = LogicN->getOperand(0);
4999       SDValue Op1 = LogicN->getOperand(1);
5000 
5001       if (isa<ConstantSDNode>(Op0))
5002           std::swap(Op0, Op1);
5003 
5004       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5005                                 Op1, MaskOp);
5006 
5007       DAG.UpdateNodeOperands(LogicN, Op0, And);
5008     }
5009 
5010     // Create narrow loads.
5011     for (auto *Load : Loads) {
5012       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5013       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5014                                 SDValue(Load, 0), MaskOp);
5015       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5016       if (And.getOpcode() == ISD ::AND)
5017         And = SDValue(
5018             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5019       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5020       assert(NewLoad &&
5021              "Shouldn't be masking the load if it can't be narrowed");
5022       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5023     }
5024     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5025     return true;
5026   }
5027   return false;
5028 }
5029 
5030 // Unfold
5031 //    x &  (-1 'logical shift' y)
5032 // To
5033 //    (x 'opposite logical shift' y) 'logical shift' y
5034 // if it is better for performance.
5035 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5036   assert(N->getOpcode() == ISD::AND);
5037 
5038   SDValue N0 = N->getOperand(0);
5039   SDValue N1 = N->getOperand(1);
5040 
5041   // Do we actually prefer shifts over mask?
5042   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5043     return SDValue();
5044 
5045   // Try to match  (-1 '[outer] logical shift' y)
5046   unsigned OuterShift;
5047   unsigned InnerShift; // The opposite direction to the OuterShift.
5048   SDValue Y;           // Shift amount.
5049   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5050     if (!M.hasOneUse())
5051       return false;
5052     OuterShift = M->getOpcode();
5053     if (OuterShift == ISD::SHL)
5054       InnerShift = ISD::SRL;
5055     else if (OuterShift == ISD::SRL)
5056       InnerShift = ISD::SHL;
5057     else
5058       return false;
5059     if (!isAllOnesConstant(M->getOperand(0)))
5060       return false;
5061     Y = M->getOperand(1);
5062     return true;
5063   };
5064 
5065   SDValue X;
5066   if (matchMask(N1))
5067     X = N0;
5068   else if (matchMask(N0))
5069     X = N1;
5070   else
5071     return SDValue();
5072 
5073   SDLoc DL(N);
5074   EVT VT = N->getValueType(0);
5075 
5076   //     tmp = x   'opposite logical shift' y
5077   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5078   //     ret = tmp 'logical shift' y
5079   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5080 
5081   return T1;
5082 }
5083 
5084 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5085 /// For a target with a bit test, this is expected to become test + set and save
5086 /// at least 1 instruction.
5087 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5088   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5089 
5090   // This is probably not worthwhile without a supported type.
5091   EVT VT = And->getValueType(0);
5092   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5093   if (!TLI.isTypeLegal(VT))
5094     return SDValue();
5095 
5096   // Look through an optional extension and find a 'not'.
5097   // TODO: Should we favor test+set even without the 'not' op?
5098   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5099   if (Not.getOpcode() == ISD::ANY_EXTEND)
5100     Not = Not.getOperand(0);
5101   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5102     return SDValue();
5103 
5104   // Look though an optional truncation. The source operand may not be the same
5105   // type as the original 'and', but that is ok because we are masking off
5106   // everything but the low bit.
5107   SDValue Srl = Not.getOperand(0);
5108   if (Srl.getOpcode() == ISD::TRUNCATE)
5109     Srl = Srl.getOperand(0);
5110 
5111   // Match a shift-right by constant.
5112   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5113       !isa<ConstantSDNode>(Srl.getOperand(1)))
5114     return SDValue();
5115 
5116   // We might have looked through casts that make this transform invalid.
5117   // TODO: If the source type is wider than the result type, do the mask and
5118   //       compare in the source type.
5119   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5120   unsigned VTBitWidth = VT.getSizeInBits();
5121   if (ShiftAmt.uge(VTBitWidth))
5122     return SDValue();
5123 
5124   // Turn this into a bit-test pattern using mask op + setcc:
5125   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5126   SDLoc DL(And);
5127   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5128   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5129   SDValue Mask = DAG.getConstant(
5130       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5131   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5132   SDValue Zero = DAG.getConstant(0, DL, VT);
5133   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5134   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5135 }
5136 
5137 SDValue DAGCombiner::visitAND(SDNode *N) {
5138   SDValue N0 = N->getOperand(0);
5139   SDValue N1 = N->getOperand(1);
5140   EVT VT = N1.getValueType();
5141 
5142   // x & x --> x
5143   if (N0 == N1)
5144     return N0;
5145 
5146   // fold vector ops
5147   if (VT.isVector()) {
5148     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5149       return FoldedVOp;
5150 
5151     // fold (and x, 0) -> 0, vector edition
5152     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5153       // do not return N0, because undef node may exist in N0
5154       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5155                              SDLoc(N), N0.getValueType());
5156     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5157       // do not return N1, because undef node may exist in N1
5158       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5159                              SDLoc(N), N1.getValueType());
5160 
5161     // fold (and x, -1) -> x, vector edition
5162     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5163       return N1;
5164     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5165       return N0;
5166   }
5167 
5168   // fold (and c1, c2) -> c1&c2
5169   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5170   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5171     return C;
5172 
5173   // canonicalize constant to RHS
5174   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5175       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5176     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5177 
5178   // fold (and x, -1) -> x
5179   if (isAllOnesConstant(N1))
5180     return N0;
5181 
5182   // if (and x, c) is known to be zero, return 0
5183   unsigned BitWidth = VT.getScalarSizeInBits();
5184   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5185                                    APInt::getAllOnesValue(BitWidth)))
5186     return DAG.getConstant(0, SDLoc(N), VT);
5187 
5188   if (SDValue NewSel = foldBinOpIntoSelect(N))
5189     return NewSel;
5190 
5191   // reassociate and
5192   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5193     return RAND;
5194 
5195   // Try to convert a constant mask AND into a shuffle clear mask.
5196   if (VT.isVector())
5197     if (SDValue Shuffle = XformToShuffleWithZero(N))
5198       return Shuffle;
5199 
5200   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5201     return Combined;
5202 
5203   // fold (and (or x, C), D) -> D if (C & D) == D
5204   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5205     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5206   };
5207   if (N0.getOpcode() == ISD::OR &&
5208       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5209     return N1;
5210   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5211   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5212     SDValue N0Op0 = N0.getOperand(0);
5213     APInt Mask = ~N1C->getAPIntValue();
5214     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5215     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5216       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5217                                  N0.getValueType(), N0Op0);
5218 
5219       // Replace uses of the AND with uses of the Zero extend node.
5220       CombineTo(N, Zext);
5221 
5222       // We actually want to replace all uses of the any_extend with the
5223       // zero_extend, to avoid duplicating things.  This will later cause this
5224       // AND to be folded.
5225       CombineTo(N0.getNode(), Zext);
5226       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5227     }
5228   }
5229 
5230   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5231   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5232   // already be zero by virtue of the width of the base type of the load.
5233   //
5234   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5235   // more cases.
5236   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5237        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5238        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5239        N0.getOperand(0).getResNo() == 0) ||
5240       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5241     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5242                                          N0 : N0.getOperand(0) );
5243 
5244     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5245     // This can be a pure constant or a vector splat, in which case we treat the
5246     // vector as a scalar and use the splat value.
5247     APInt Constant = APInt::getNullValue(1);
5248     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5249       Constant = C->getAPIntValue();
5250     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5251       APInt SplatValue, SplatUndef;
5252       unsigned SplatBitSize;
5253       bool HasAnyUndefs;
5254       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5255                                              SplatBitSize, HasAnyUndefs);
5256       if (IsSplat) {
5257         // Undef bits can contribute to a possible optimisation if set, so
5258         // set them.
5259         SplatValue |= SplatUndef;
5260 
5261         // The splat value may be something like "0x00FFFFFF", which means 0 for
5262         // the first vector value and FF for the rest, repeating. We need a mask
5263         // that will apply equally to all members of the vector, so AND all the
5264         // lanes of the constant together.
5265         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5266 
5267         // If the splat value has been compressed to a bitlength lower
5268         // than the size of the vector lane, we need to re-expand it to
5269         // the lane size.
5270         if (EltBitWidth > SplatBitSize)
5271           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5272                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5273             SplatValue |= SplatValue.shl(SplatBitSize);
5274 
5275         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5276         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5277         if ((SplatBitSize % EltBitWidth) == 0) {
5278           Constant = APInt::getAllOnesValue(EltBitWidth);
5279           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5280             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5281         }
5282       }
5283     }
5284 
5285     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5286     // actually legal and isn't going to get expanded, else this is a false
5287     // optimisation.
5288     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5289                                                     Load->getValueType(0),
5290                                                     Load->getMemoryVT());
5291 
5292     // Resize the constant to the same size as the original memory access before
5293     // extension. If it is still the AllOnesValue then this AND is completely
5294     // unneeded.
5295     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5296 
5297     bool B;
5298     switch (Load->getExtensionType()) {
5299     default: B = false; break;
5300     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5301     case ISD::ZEXTLOAD:
5302     case ISD::NON_EXTLOAD: B = true; break;
5303     }
5304 
5305     if (B && Constant.isAllOnesValue()) {
5306       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5307       // preserve semantics once we get rid of the AND.
5308       SDValue NewLoad(Load, 0);
5309 
5310       // Fold the AND away. NewLoad may get replaced immediately.
5311       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5312 
5313       if (Load->getExtensionType() == ISD::EXTLOAD) {
5314         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5315                               Load->getValueType(0), SDLoc(Load),
5316                               Load->getChain(), Load->getBasePtr(),
5317                               Load->getOffset(), Load->getMemoryVT(),
5318                               Load->getMemOperand());
5319         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5320         if (Load->getNumValues() == 3) {
5321           // PRE/POST_INC loads have 3 values.
5322           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5323                            NewLoad.getValue(2) };
5324           CombineTo(Load, To, 3, true);
5325         } else {
5326           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5327         }
5328       }
5329 
5330       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5331     }
5332   }
5333 
5334   // fold (and (load x), 255) -> (zextload x, i8)
5335   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5336   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5337   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5338                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5339                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5340     if (SDValue Res = ReduceLoadWidth(N)) {
5341       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5342         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5343       AddToWorklist(N);
5344       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5345       return SDValue(N, 0);
5346     }
5347   }
5348 
5349   if (LegalTypes) {
5350     // Attempt to propagate the AND back up to the leaves which, if they're
5351     // loads, can be combined to narrow loads and the AND node can be removed.
5352     // Perform after legalization so that extend nodes will already be
5353     // combined into the loads.
5354     if (BackwardsPropagateMask(N))
5355       return SDValue(N, 0);
5356   }
5357 
5358   if (SDValue Combined = visitANDLike(N0, N1, N))
5359     return Combined;
5360 
5361   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5362   if (N0.getOpcode() == N1.getOpcode())
5363     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5364       return V;
5365 
5366   // Masking the negated extension of a boolean is just the zero-extended
5367   // boolean:
5368   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5369   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5370   //
5371   // Note: the SimplifyDemandedBits fold below can make an information-losing
5372   // transform, and then we have no way to find this better fold.
5373   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5374     if (isNullOrNullSplat(N0.getOperand(0))) {
5375       SDValue SubRHS = N0.getOperand(1);
5376       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5377           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5378         return SubRHS;
5379       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5380           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5381         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5382     }
5383   }
5384 
5385   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5386   // fold (and (sra)) -> (and (srl)) when possible.
5387   if (SimplifyDemandedBits(SDValue(N, 0)))
5388     return SDValue(N, 0);
5389 
5390   // fold (zext_inreg (extload x)) -> (zextload x)
5391   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5392   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5393       (ISD::isEXTLoad(N0.getNode()) ||
5394        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5395     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5396     EVT MemVT = LN0->getMemoryVT();
5397     // If we zero all the possible extended bits, then we can turn this into
5398     // a zextload if we are running before legalize or the operation is legal.
5399     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5400     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5401     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5402     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5403         ((!LegalOperations && LN0->isSimple()) ||
5404          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5405       SDValue ExtLoad =
5406           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5407                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5408       AddToWorklist(N);
5409       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5410       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5411     }
5412   }
5413 
5414   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5415   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5416     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5417                                            N0.getOperand(1), false))
5418       return BSwap;
5419   }
5420 
5421   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5422     return Shifts;
5423 
5424   if (TLI.hasBitTest(N0, N1))
5425     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5426       return V;
5427 
5428   return SDValue();
5429 }
5430 
5431 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5432 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5433                                         bool DemandHighBits) {
5434   if (!LegalOperations)
5435     return SDValue();
5436 
5437   EVT VT = N->getValueType(0);
5438   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5439     return SDValue();
5440   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5441     return SDValue();
5442 
5443   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5444   bool LookPassAnd0 = false;
5445   bool LookPassAnd1 = false;
5446   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5447       std::swap(N0, N1);
5448   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5449       std::swap(N0, N1);
5450   if (N0.getOpcode() == ISD::AND) {
5451     if (!N0.getNode()->hasOneUse())
5452       return SDValue();
5453     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5454     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5455     // This is needed for X86.
5456     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5457                   N01C->getZExtValue() != 0xFFFF))
5458       return SDValue();
5459     N0 = N0.getOperand(0);
5460     LookPassAnd0 = true;
5461   }
5462 
5463   if (N1.getOpcode() == ISD::AND) {
5464     if (!N1.getNode()->hasOneUse())
5465       return SDValue();
5466     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5467     if (!N11C || N11C->getZExtValue() != 0xFF)
5468       return SDValue();
5469     N1 = N1.getOperand(0);
5470     LookPassAnd1 = true;
5471   }
5472 
5473   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5474     std::swap(N0, N1);
5475   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5476     return SDValue();
5477   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5478     return SDValue();
5479 
5480   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5481   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5482   if (!N01C || !N11C)
5483     return SDValue();
5484   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5485     return SDValue();
5486 
5487   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5488   SDValue N00 = N0->getOperand(0);
5489   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5490     if (!N00.getNode()->hasOneUse())
5491       return SDValue();
5492     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5493     if (!N001C || N001C->getZExtValue() != 0xFF)
5494       return SDValue();
5495     N00 = N00.getOperand(0);
5496     LookPassAnd0 = true;
5497   }
5498 
5499   SDValue N10 = N1->getOperand(0);
5500   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5501     if (!N10.getNode()->hasOneUse())
5502       return SDValue();
5503     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5504     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5505     // for X86.
5506     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5507                    N101C->getZExtValue() != 0xFFFF))
5508       return SDValue();
5509     N10 = N10.getOperand(0);
5510     LookPassAnd1 = true;
5511   }
5512 
5513   if (N00 != N10)
5514     return SDValue();
5515 
5516   // Make sure everything beyond the low halfword gets set to zero since the SRL
5517   // 16 will clear the top bits.
5518   unsigned OpSizeInBits = VT.getSizeInBits();
5519   if (DemandHighBits && OpSizeInBits > 16) {
5520     // If the left-shift isn't masked out then the only way this is a bswap is
5521     // if all bits beyond the low 8 are 0. In that case the entire pattern
5522     // reduces to a left shift anyway: leave it for other parts of the combiner.
5523     if (!LookPassAnd0)
5524       return SDValue();
5525 
5526     // However, if the right shift isn't masked out then it might be because
5527     // it's not needed. See if we can spot that too.
5528     if (!LookPassAnd1 &&
5529         !DAG.MaskedValueIsZero(
5530             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5531       return SDValue();
5532   }
5533 
5534   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5535   if (OpSizeInBits > 16) {
5536     SDLoc DL(N);
5537     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5538                       DAG.getConstant(OpSizeInBits - 16, DL,
5539                                       getShiftAmountTy(VT)));
5540   }
5541   return Res;
5542 }
5543 
5544 /// Return true if the specified node is an element that makes up a 32-bit
5545 /// packed halfword byteswap.
5546 /// ((x & 0x000000ff) << 8) |
5547 /// ((x & 0x0000ff00) >> 8) |
5548 /// ((x & 0x00ff0000) << 8) |
5549 /// ((x & 0xff000000) >> 8)
5550 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5551   if (!N.getNode()->hasOneUse())
5552     return false;
5553 
5554   unsigned Opc = N.getOpcode();
5555   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5556     return false;
5557 
5558   SDValue N0 = N.getOperand(0);
5559   unsigned Opc0 = N0.getOpcode();
5560   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5561     return false;
5562 
5563   ConstantSDNode *N1C = nullptr;
5564   // SHL or SRL: look upstream for AND mask operand
5565   if (Opc == ISD::AND)
5566     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5567   else if (Opc0 == ISD::AND)
5568     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5569   if (!N1C)
5570     return false;
5571 
5572   unsigned MaskByteOffset;
5573   switch (N1C->getZExtValue()) {
5574   default:
5575     return false;
5576   case 0xFF:       MaskByteOffset = 0; break;
5577   case 0xFF00:     MaskByteOffset = 1; break;
5578   case 0xFFFF:
5579     // In case demanded bits didn't clear the bits that will be shifted out.
5580     // This is needed for X86.
5581     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5582       MaskByteOffset = 1;
5583       break;
5584     }
5585     return false;
5586   case 0xFF0000:   MaskByteOffset = 2; break;
5587   case 0xFF000000: MaskByteOffset = 3; break;
5588   }
5589 
5590   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5591   if (Opc == ISD::AND) {
5592     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5593       // (x >> 8) & 0xff
5594       // (x >> 8) & 0xff0000
5595       if (Opc0 != ISD::SRL)
5596         return false;
5597       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5598       if (!C || C->getZExtValue() != 8)
5599         return false;
5600     } else {
5601       // (x << 8) & 0xff00
5602       // (x << 8) & 0xff000000
5603       if (Opc0 != ISD::SHL)
5604         return false;
5605       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5606       if (!C || C->getZExtValue() != 8)
5607         return false;
5608     }
5609   } else if (Opc == ISD::SHL) {
5610     // (x & 0xff) << 8
5611     // (x & 0xff0000) << 8
5612     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5613       return false;
5614     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5615     if (!C || C->getZExtValue() != 8)
5616       return false;
5617   } else { // Opc == ISD::SRL
5618     // (x & 0xff00) >> 8
5619     // (x & 0xff000000) >> 8
5620     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5621       return false;
5622     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5623     if (!C || C->getZExtValue() != 8)
5624       return false;
5625   }
5626 
5627   if (Parts[MaskByteOffset])
5628     return false;
5629 
5630   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5631   return true;
5632 }
5633 
5634 // Match 2 elements of a packed halfword bswap.
5635 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
5636   if (N.getOpcode() == ISD::OR)
5637     return isBSwapHWordElement(N.getOperand(0), Parts) &&
5638            isBSwapHWordElement(N.getOperand(1), Parts);
5639 
5640   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
5641     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
5642     if (!C || C->getAPIntValue() != 16)
5643       return false;
5644     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
5645     return true;
5646   }
5647 
5648   return false;
5649 }
5650 
5651 /// Match a 32-bit packed halfword bswap. That is
5652 /// ((x & 0x000000ff) << 8) |
5653 /// ((x & 0x0000ff00) >> 8) |
5654 /// ((x & 0x00ff0000) << 8) |
5655 /// ((x & 0xff000000) >> 8)
5656 /// => (rotl (bswap x), 16)
5657 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5658   if (!LegalOperations)
5659     return SDValue();
5660 
5661   EVT VT = N->getValueType(0);
5662   if (VT != MVT::i32)
5663     return SDValue();
5664   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5665     return SDValue();
5666 
5667   // Look for either
5668   // (or (bswaphpair), (bswaphpair))
5669   // (or (or (bswaphpair), (and)), (and))
5670   // (or (or (and), (bswaphpair)), (and))
5671   SDNode *Parts[4] = {};
5672 
5673   if (isBSwapHWordPair(N0, Parts)) {
5674     // (or (or (and), (and)), (or (and), (and)))
5675     if (!isBSwapHWordPair(N1, Parts))
5676       return SDValue();
5677   } else if (N0.getOpcode() == ISD::OR) {
5678     // (or (or (or (and), (and)), (and)), (and))
5679     if (!isBSwapHWordElement(N1, Parts))
5680       return SDValue();
5681     SDValue N00 = N0.getOperand(0);
5682     SDValue N01 = N0.getOperand(1);
5683     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
5684         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
5685       return SDValue();
5686   } else
5687     return SDValue();
5688 
5689   // Make sure the parts are all coming from the same node.
5690   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5691     return SDValue();
5692 
5693   SDLoc DL(N);
5694   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5695                               SDValue(Parts[0], 0));
5696 
5697   // Result of the bswap should be rotated by 16. If it's not legal, then
5698   // do  (x << 16) | (x >> 16).
5699   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5700   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5701     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5702   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5703     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5704   return DAG.getNode(ISD::OR, DL, VT,
5705                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5706                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5707 }
5708 
5709 /// This contains all DAGCombine rules which reduce two values combined by
5710 /// an Or operation to a single value \see visitANDLike().
5711 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5712   EVT VT = N1.getValueType();
5713   SDLoc DL(N);
5714 
5715   // fold (or x, undef) -> -1
5716   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5717     return DAG.getAllOnesConstant(DL, VT);
5718 
5719   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5720     return V;
5721 
5722   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5723   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5724       // Don't increase # computations.
5725       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5726     // We can only do this xform if we know that bits from X that are set in C2
5727     // but not in C1 are already zero.  Likewise for Y.
5728     if (const ConstantSDNode *N0O1C =
5729         getAsNonOpaqueConstant(N0.getOperand(1))) {
5730       if (const ConstantSDNode *N1O1C =
5731           getAsNonOpaqueConstant(N1.getOperand(1))) {
5732         // We can only do this xform if we know that bits from X that are set in
5733         // C2 but not in C1 are already zero.  Likewise for Y.
5734         const APInt &LHSMask = N0O1C->getAPIntValue();
5735         const APInt &RHSMask = N1O1C->getAPIntValue();
5736 
5737         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5738             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5739           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5740                                   N0.getOperand(0), N1.getOperand(0));
5741           return DAG.getNode(ISD::AND, DL, VT, X,
5742                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5743         }
5744       }
5745     }
5746   }
5747 
5748   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5749   if (N0.getOpcode() == ISD::AND &&
5750       N1.getOpcode() == ISD::AND &&
5751       N0.getOperand(0) == N1.getOperand(0) &&
5752       // Don't increase # computations.
5753       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5754     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5755                             N0.getOperand(1), N1.getOperand(1));
5756     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5757   }
5758 
5759   return SDValue();
5760 }
5761 
5762 /// OR combines for which the commuted variant will be tried as well.
5763 static SDValue visitORCommutative(
5764     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5765   EVT VT = N0.getValueType();
5766   if (N0.getOpcode() == ISD::AND) {
5767     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5768     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5769       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5770 
5771     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5772     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5773       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5774   }
5775 
5776   return SDValue();
5777 }
5778 
5779 SDValue DAGCombiner::visitOR(SDNode *N) {
5780   SDValue N0 = N->getOperand(0);
5781   SDValue N1 = N->getOperand(1);
5782   EVT VT = N1.getValueType();
5783 
5784   // x | x --> x
5785   if (N0 == N1)
5786     return N0;
5787 
5788   // fold vector ops
5789   if (VT.isVector()) {
5790     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5791       return FoldedVOp;
5792 
5793     // fold (or x, 0) -> x, vector edition
5794     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5795       return N1;
5796     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5797       return N0;
5798 
5799     // fold (or x, -1) -> -1, vector edition
5800     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5801       // do not return N0, because undef node may exist in N0
5802       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5803     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5804       // do not return N1, because undef node may exist in N1
5805       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5806 
5807     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5808     // Do this only if the resulting shuffle is legal.
5809     if (isa<ShuffleVectorSDNode>(N0) &&
5810         isa<ShuffleVectorSDNode>(N1) &&
5811         // Avoid folding a node with illegal type.
5812         TLI.isTypeLegal(VT)) {
5813       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5814       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5815       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5816       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5817       // Ensure both shuffles have a zero input.
5818       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5819         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5820         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5821         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5822         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5823         bool CanFold = true;
5824         int NumElts = VT.getVectorNumElements();
5825         SmallVector<int, 4> Mask(NumElts);
5826 
5827         for (int i = 0; i != NumElts; ++i) {
5828           int M0 = SV0->getMaskElt(i);
5829           int M1 = SV1->getMaskElt(i);
5830 
5831           // Determine if either index is pointing to a zero vector.
5832           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5833           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5834 
5835           // If one element is zero and the otherside is undef, keep undef.
5836           // This also handles the case that both are undef.
5837           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5838             Mask[i] = -1;
5839             continue;
5840           }
5841 
5842           // Make sure only one of the elements is zero.
5843           if (M0Zero == M1Zero) {
5844             CanFold = false;
5845             break;
5846           }
5847 
5848           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5849 
5850           // We have a zero and non-zero element. If the non-zero came from
5851           // SV0 make the index a LHS index. If it came from SV1, make it
5852           // a RHS index. We need to mod by NumElts because we don't care
5853           // which operand it came from in the original shuffles.
5854           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5855         }
5856 
5857         if (CanFold) {
5858           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5859           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5860 
5861           SDValue LegalShuffle =
5862               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
5863                                           Mask, DAG);
5864           if (LegalShuffle)
5865             return LegalShuffle;
5866         }
5867       }
5868     }
5869   }
5870 
5871   // fold (or c1, c2) -> c1|c2
5872   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5873   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
5874     return C;
5875 
5876   // canonicalize constant to RHS
5877   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5878      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5879     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5880 
5881   // fold (or x, 0) -> x
5882   if (isNullConstant(N1))
5883     return N0;
5884 
5885   // fold (or x, -1) -> -1
5886   if (isAllOnesConstant(N1))
5887     return N1;
5888 
5889   if (SDValue NewSel = foldBinOpIntoSelect(N))
5890     return NewSel;
5891 
5892   // fold (or x, c) -> c iff (x & ~c) == 0
5893   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5894     return N1;
5895 
5896   if (SDValue Combined = visitORLike(N0, N1, N))
5897     return Combined;
5898 
5899   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5900     return Combined;
5901 
5902   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5903   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5904     return BSwap;
5905   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5906     return BSwap;
5907 
5908   // reassociate or
5909   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5910     return ROR;
5911 
5912   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5913   // iff (c1 & c2) != 0 or c1/c2 are undef.
5914   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5915     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5916   };
5917   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5918       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5919     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
5920                                                  {N1, N0.getOperand(1)})) {
5921       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5922       AddToWorklist(IOR.getNode());
5923       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5924     }
5925   }
5926 
5927   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
5928     return Combined;
5929   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
5930     return Combined;
5931 
5932   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5933   if (N0.getOpcode() == N1.getOpcode())
5934     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5935       return V;
5936 
5937   // See if this is some rotate idiom.
5938   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
5939     return Rot;
5940 
5941   if (SDValue Load = MatchLoadCombine(N))
5942     return Load;
5943 
5944   // Simplify the operands using demanded-bits information.
5945   if (SimplifyDemandedBits(SDValue(N, 0)))
5946     return SDValue(N, 0);
5947 
5948   // If OR can be rewritten into ADD, try combines based on ADD.
5949   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
5950       DAG.haveNoCommonBitsSet(N0, N1))
5951     if (SDValue Combined = visitADDLike(N))
5952       return Combined;
5953 
5954   return SDValue();
5955 }
5956 
5957 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5958   if (Op.getOpcode() == ISD::AND &&
5959       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5960     Mask = Op.getOperand(1);
5961     return Op.getOperand(0);
5962   }
5963   return Op;
5964 }
5965 
5966 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5967 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5968                             SDValue &Mask) {
5969   Op = stripConstantMask(DAG, Op, Mask);
5970   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5971     Shift = Op;
5972     return true;
5973   }
5974   return false;
5975 }
5976 
5977 /// Helper function for visitOR to extract the needed side of a rotate idiom
5978 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5979 /// InstCombine merged some outside op with one of the shifts from
5980 /// the rotate pattern.
5981 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5982 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5983 /// patterns:
5984 ///
5985 ///   (or (add v v) (shrl v bitwidth-1)):
5986 ///     expands (add v v) -> (shl v 1)
5987 ///
5988 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5989 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5990 ///
5991 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5992 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5993 ///
5994 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5995 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5996 ///
5997 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5998 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5999 ///
6000 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6001 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6002                                      SDValue ExtractFrom, SDValue &Mask,
6003                                      const SDLoc &DL) {
6004   assert(OppShift && ExtractFrom && "Empty SDValue");
6005   assert(
6006       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6007       "Existing shift must be valid as a rotate half");
6008 
6009   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6010 
6011   // Value and Type of the shift.
6012   SDValue OppShiftLHS = OppShift.getOperand(0);
6013   EVT ShiftedVT = OppShiftLHS.getValueType();
6014 
6015   // Amount of the existing shift.
6016   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6017 
6018   // (add v v) -> (shl v 1)
6019   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6020       ExtractFrom.getOpcode() == ISD::ADD &&
6021       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6022       ExtractFrom.getOperand(0) == OppShiftLHS &&
6023       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6024     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6025                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6026 
6027   // Preconditions:
6028   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6029   //
6030   // Find opcode of the needed shift to be extracted from (op0 v c0).
6031   unsigned Opcode = ISD::DELETED_NODE;
6032   bool IsMulOrDiv = false;
6033   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6034   // opcode or its arithmetic (mul or udiv) variant.
6035   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6036     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6037     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6038       return false;
6039     Opcode = NeededShift;
6040     return true;
6041   };
6042   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6043   // that the needed shift can be extracted from.
6044   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6045       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6046     return SDValue();
6047 
6048   // op0 must be the same opcode on both sides, have the same LHS argument,
6049   // and produce the same value type.
6050   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6051       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6052       ShiftedVT != ExtractFrom.getValueType())
6053     return SDValue();
6054 
6055   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6056   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6057   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6058   ConstantSDNode *ExtractFromCst =
6059       isConstOrConstSplat(ExtractFrom.getOperand(1));
6060   // TODO: We should be able to handle non-uniform constant vectors for these values
6061   // Check that we have constant values.
6062   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6063       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6064       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6065     return SDValue();
6066 
6067   // Compute the shift amount we need to extract to complete the rotate.
6068   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6069   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6070     return SDValue();
6071   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6072   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6073   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6074   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6075   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6076 
6077   // Now try extract the needed shift from the ExtractFrom op and see if the
6078   // result matches up with the existing shift's LHS op.
6079   if (IsMulOrDiv) {
6080     // Op to extract from is a mul or udiv by a constant.
6081     // Check:
6082     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6083     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6084     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6085                                                  NeededShiftAmt.getZExtValue());
6086     APInt ResultAmt;
6087     APInt Rem;
6088     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6089     if (Rem != 0 || ResultAmt != OppLHSAmt)
6090       return SDValue();
6091   } else {
6092     // Op to extract from is a shift by a constant.
6093     // Check:
6094     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6095     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6096                                           ExtractFromAmt.getBitWidth()))
6097       return SDValue();
6098   }
6099 
6100   // Return the expanded shift op that should allow a rotate to be formed.
6101   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6102   EVT ResVT = ExtractFrom.getValueType();
6103   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6104   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6105 }
6106 
6107 // Return true if we can prove that, whenever Neg and Pos are both in the
6108 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6109 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6110 //
6111 //     (or (shift1 X, Neg), (shift2 X, Pos))
6112 //
6113 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6114 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6115 // to consider shift amounts with defined behavior.
6116 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6117                            SelectionDAG &DAG) {
6118   // If EltSize is a power of 2 then:
6119   //
6120   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6121   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6122   //
6123   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6124   // for the stronger condition:
6125   //
6126   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6127   //
6128   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6129   // we can just replace Neg with Neg' for the rest of the function.
6130   //
6131   // In other cases we check for the even stronger condition:
6132   //
6133   //     Neg == EltSize - Pos                                    [B]
6134   //
6135   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6136   // behavior if Pos == 0 (and consequently Neg == EltSize).
6137   //
6138   // We could actually use [A] whenever EltSize is a power of 2, but the
6139   // only extra cases that it would match are those uninteresting ones
6140   // where Neg and Pos are never in range at the same time.  E.g. for
6141   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6142   // as well as (sub 32, Pos), but:
6143   //
6144   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6145   //
6146   // always invokes undefined behavior for 32-bit X.
6147   //
6148   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6149   unsigned MaskLoBits = 0;
6150   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6151     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6152       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6153       unsigned Bits = Log2_64(EltSize);
6154       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6155           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6156         Neg = Neg.getOperand(0);
6157         MaskLoBits = Bits;
6158       }
6159     }
6160   }
6161 
6162   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6163   if (Neg.getOpcode() != ISD::SUB)
6164     return false;
6165   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6166   if (!NegC)
6167     return false;
6168   SDValue NegOp1 = Neg.getOperand(1);
6169 
6170   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6171   // Pos'.  The truncation is redundant for the purpose of the equality.
6172   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6173     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6174       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6175       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6176           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6177            MaskLoBits))
6178         Pos = Pos.getOperand(0);
6179     }
6180   }
6181 
6182   // The condition we need is now:
6183   //
6184   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6185   //
6186   // If NegOp1 == Pos then we need:
6187   //
6188   //              EltSize & Mask == NegC & Mask
6189   //
6190   // (because "x & Mask" is a truncation and distributes through subtraction).
6191   APInt Width;
6192   if (Pos == NegOp1)
6193     Width = NegC->getAPIntValue();
6194 
6195   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6196   // Then the condition we want to prove becomes:
6197   //
6198   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6199   //
6200   // which, again because "x & Mask" is a truncation, becomes:
6201   //
6202   //                NegC & Mask == (EltSize - PosC) & Mask
6203   //             EltSize & Mask == (NegC + PosC) & Mask
6204   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6205     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6206       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6207     else
6208       return false;
6209   } else
6210     return false;
6211 
6212   // Now we just need to check that EltSize & Mask == Width & Mask.
6213   if (MaskLoBits)
6214     // EltSize & Mask is 0 since Mask is EltSize - 1.
6215     return Width.getLoBits(MaskLoBits) == 0;
6216   return Width == EltSize;
6217 }
6218 
6219 // A subroutine of MatchRotate used once we have found an OR of two opposite
6220 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6221 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6222 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6223 // Neg with outer conversions stripped away.
6224 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6225                                        SDValue Neg, SDValue InnerPos,
6226                                        SDValue InnerNeg, unsigned PosOpcode,
6227                                        unsigned NegOpcode, const SDLoc &DL) {
6228   // fold (or (shl x, (*ext y)),
6229   //          (srl x, (*ext (sub 32, y)))) ->
6230   //   (rotl x, y) or (rotr x, (sub 32, y))
6231   //
6232   // fold (or (shl x, (*ext (sub 32, y))),
6233   //          (srl x, (*ext y))) ->
6234   //   (rotr x, y) or (rotl x, (sub 32, y))
6235   EVT VT = Shifted.getValueType();
6236   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6237     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6238     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6239                        HasPos ? Pos : Neg);
6240   }
6241 
6242   return SDValue();
6243 }
6244 
6245 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6246 // idioms for rotate, and if the target supports rotation instructions, generate
6247 // a rot[lr].
6248 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6249   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6250   EVT VT = LHS.getValueType();
6251   if (!TLI.isTypeLegal(VT))
6252     return SDValue();
6253 
6254   // The target must have at least one rotate flavor.
6255   bool HasROTL = hasOperation(ISD::ROTL, VT);
6256   bool HasROTR = hasOperation(ISD::ROTR, VT);
6257   if (!HasROTL && !HasROTR)
6258     return SDValue();
6259 
6260   // Check for truncated rotate.
6261   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6262       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6263     assert(LHS.getValueType() == RHS.getValueType());
6264     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6265       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6266     }
6267   }
6268 
6269   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6270   SDValue LHSShift;   // The shift.
6271   SDValue LHSMask;    // AND value if any.
6272   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6273 
6274   SDValue RHSShift;   // The shift.
6275   SDValue RHSMask;    // AND value if any.
6276   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6277 
6278   // If neither side matched a rotate half, bail
6279   if (!LHSShift && !RHSShift)
6280     return SDValue();
6281 
6282   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6283   // side of the rotate, so try to handle that here. In all cases we need to
6284   // pass the matched shift from the opposite side to compute the opcode and
6285   // needed shift amount to extract.  We still want to do this if both sides
6286   // matched a rotate half because one half may be a potential overshift that
6287   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6288   // single one).
6289 
6290   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6291   if (LHSShift)
6292     if (SDValue NewRHSShift =
6293             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6294       RHSShift = NewRHSShift;
6295   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6296   if (RHSShift)
6297     if (SDValue NewLHSShift =
6298             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6299       LHSShift = NewLHSShift;
6300 
6301   // If a side is still missing, nothing else we can do.
6302   if (!RHSShift || !LHSShift)
6303     return SDValue();
6304 
6305   // At this point we've matched or extracted a shift op on each side.
6306 
6307   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
6308     return SDValue(); // Not shifting the same value.
6309 
6310   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6311     return SDValue(); // Shifts must disagree.
6312 
6313   // Canonicalize shl to left side in a shl/srl pair.
6314   if (RHSShift.getOpcode() == ISD::SHL) {
6315     std::swap(LHS, RHS);
6316     std::swap(LHSShift, RHSShift);
6317     std::swap(LHSMask, RHSMask);
6318   }
6319 
6320   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6321   SDValue LHSShiftArg = LHSShift.getOperand(0);
6322   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6323   SDValue RHSShiftArg = RHSShift.getOperand(0);
6324   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6325 
6326   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6327   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6328   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6329                                         ConstantSDNode *RHS) {
6330     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6331   };
6332   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6333     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
6334                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
6335 
6336     // If there is an AND of either shifted operand, apply it to the result.
6337     if (LHSMask.getNode() || RHSMask.getNode()) {
6338       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6339       SDValue Mask = AllOnes;
6340 
6341       if (LHSMask.getNode()) {
6342         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6343         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6344                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6345       }
6346       if (RHSMask.getNode()) {
6347         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6348         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6349                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6350       }
6351 
6352       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
6353     }
6354 
6355     return Rot;
6356   }
6357 
6358   // If there is a mask here, and we have a variable shift, we can't be sure
6359   // that we're masking out the right stuff.
6360   if (LHSMask.getNode() || RHSMask.getNode())
6361     return SDValue();
6362 
6363   // If the shift amount is sign/zext/any-extended just peel it off.
6364   SDValue LExtOp0 = LHSShiftAmt;
6365   SDValue RExtOp0 = RHSShiftAmt;
6366   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6367        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6368        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6369        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6370       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6371        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6372        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6373        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6374     LExtOp0 = LHSShiftAmt.getOperand(0);
6375     RExtOp0 = RHSShiftAmt.getOperand(0);
6376   }
6377 
6378   SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6379                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6380   if (TryL)
6381     return TryL;
6382 
6383   SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6384                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6385   if (TryR)
6386     return TryR;
6387 
6388   return SDValue();
6389 }
6390 
6391 namespace {
6392 
6393 /// Represents known origin of an individual byte in load combine pattern. The
6394 /// value of the byte is either constant zero or comes from memory.
6395 struct ByteProvider {
6396   // For constant zero providers Load is set to nullptr. For memory providers
6397   // Load represents the node which loads the byte from memory.
6398   // ByteOffset is the offset of the byte in the value produced by the load.
6399   LoadSDNode *Load = nullptr;
6400   unsigned ByteOffset = 0;
6401 
6402   ByteProvider() = default;
6403 
6404   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6405     return ByteProvider(Load, ByteOffset);
6406   }
6407 
6408   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6409 
6410   bool isConstantZero() const { return !Load; }
6411   bool isMemory() const { return Load; }
6412 
6413   bool operator==(const ByteProvider &Other) const {
6414     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6415   }
6416 
6417 private:
6418   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6419       : Load(Load), ByteOffset(ByteOffset) {}
6420 };
6421 
6422 } // end anonymous namespace
6423 
6424 /// Recursively traverses the expression calculating the origin of the requested
6425 /// byte of the given value. Returns None if the provider can't be calculated.
6426 ///
6427 /// For all the values except the root of the expression verifies that the value
6428 /// has exactly one use and if it's not true return None. This way if the origin
6429 /// of the byte is returned it's guaranteed that the values which contribute to
6430 /// the byte are not used outside of this expression.
6431 ///
6432 /// Because the parts of the expression are not allowed to have more than one
6433 /// use this function iterates over trees, not DAGs. So it never visits the same
6434 /// node more than once.
6435 static const Optional<ByteProvider>
6436 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6437                       bool Root = false) {
6438   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6439   if (Depth == 10)
6440     return None;
6441 
6442   if (!Root && !Op.hasOneUse())
6443     return None;
6444 
6445   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6446   unsigned BitWidth = Op.getValueSizeInBits();
6447   if (BitWidth % 8 != 0)
6448     return None;
6449   unsigned ByteWidth = BitWidth / 8;
6450   assert(Index < ByteWidth && "invalid index requested");
6451   (void) ByteWidth;
6452 
6453   switch (Op.getOpcode()) {
6454   case ISD::OR: {
6455     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6456     if (!LHS)
6457       return None;
6458     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6459     if (!RHS)
6460       return None;
6461 
6462     if (LHS->isConstantZero())
6463       return RHS;
6464     if (RHS->isConstantZero())
6465       return LHS;
6466     return None;
6467   }
6468   case ISD::SHL: {
6469     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6470     if (!ShiftOp)
6471       return None;
6472 
6473     uint64_t BitShift = ShiftOp->getZExtValue();
6474     if (BitShift % 8 != 0)
6475       return None;
6476     uint64_t ByteShift = BitShift / 8;
6477 
6478     return Index < ByteShift
6479                ? ByteProvider::getConstantZero()
6480                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6481                                        Depth + 1);
6482   }
6483   case ISD::ANY_EXTEND:
6484   case ISD::SIGN_EXTEND:
6485   case ISD::ZERO_EXTEND: {
6486     SDValue NarrowOp = Op->getOperand(0);
6487     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6488     if (NarrowBitWidth % 8 != 0)
6489       return None;
6490     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6491 
6492     if (Index >= NarrowByteWidth)
6493       return Op.getOpcode() == ISD::ZERO_EXTEND
6494                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6495                  : None;
6496     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6497   }
6498   case ISD::BSWAP:
6499     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6500                                  Depth + 1);
6501   case ISD::LOAD: {
6502     auto L = cast<LoadSDNode>(Op.getNode());
6503     if (!L->isSimple() || L->isIndexed())
6504       return None;
6505 
6506     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6507     if (NarrowBitWidth % 8 != 0)
6508       return None;
6509     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6510 
6511     if (Index >= NarrowByteWidth)
6512       return L->getExtensionType() == ISD::ZEXTLOAD
6513                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6514                  : None;
6515     return ByteProvider::getMemory(L, Index);
6516   }
6517   }
6518 
6519   return None;
6520 }
6521 
6522 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6523   return i;
6524 }
6525 
6526 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6527   return BW - i - 1;
6528 }
6529 
6530 // Check if the bytes offsets we are looking at match with either big or
6531 // little endian value loaded. Return true for big endian, false for little
6532 // endian, and None if match failed.
6533 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
6534                                   int64_t FirstOffset) {
6535   // The endian can be decided only when it is 2 bytes at least.
6536   unsigned Width = ByteOffsets.size();
6537   if (Width < 2)
6538     return None;
6539 
6540   bool BigEndian = true, LittleEndian = true;
6541   for (unsigned i = 0; i < Width; i++) {
6542     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6543     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6544     BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6545     if (!BigEndian && !LittleEndian)
6546       return None;
6547   }
6548 
6549   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6550                                         "little endian");
6551   return BigEndian;
6552 }
6553 
6554 static SDValue stripTruncAndExt(SDValue Value) {
6555   switch (Value.getOpcode()) {
6556   case ISD::TRUNCATE:
6557   case ISD::ZERO_EXTEND:
6558   case ISD::SIGN_EXTEND:
6559   case ISD::ANY_EXTEND:
6560     return stripTruncAndExt(Value.getOperand(0));
6561   }
6562   return Value;
6563 }
6564 
6565 /// Match a pattern where a wide type scalar value is stored by several narrow
6566 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6567 /// supports it.
6568 ///
6569 /// Assuming little endian target:
6570 ///  i8 *p = ...
6571 ///  i32 val = ...
6572 ///  p[0] = (val >> 0) & 0xFF;
6573 ///  p[1] = (val >> 8) & 0xFF;
6574 ///  p[2] = (val >> 16) & 0xFF;
6575 ///  p[3] = (val >> 24) & 0xFF;
6576 /// =>
6577 ///  *((i32)p) = val;
6578 ///
6579 ///  i8 *p = ...
6580 ///  i32 val = ...
6581 ///  p[0] = (val >> 24) & 0xFF;
6582 ///  p[1] = (val >> 16) & 0xFF;
6583 ///  p[2] = (val >> 8) & 0xFF;
6584 ///  p[3] = (val >> 0) & 0xFF;
6585 /// =>
6586 ///  *((i32)p) = BSWAP(val);
6587 SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6588   // Collect all the stores in the chain.
6589   SDValue Chain;
6590   SmallVector<StoreSDNode *, 8> Stores;
6591   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6592     // TODO: Allow unordered atomics when wider type is legal (see D66309)
6593     if (Store->getMemoryVT() != MVT::i8 ||
6594         !Store->isSimple() || Store->isIndexed())
6595       return SDValue();
6596     Stores.push_back(Store);
6597     Chain = Store->getChain();
6598   }
6599   // Handle the simple type only.
6600   unsigned Width = Stores.size();
6601   EVT VT = EVT::getIntegerVT(
6602     *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6603   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6604     return SDValue();
6605 
6606   if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6607     return SDValue();
6608 
6609   // Check if all the bytes of the combined value we are looking at are stored
6610   // to the same base address. Collect bytes offsets from Base address into
6611   // ByteOffsets.
6612   SDValue CombinedValue;
6613   SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
6614   int64_t FirstOffset = INT64_MAX;
6615   StoreSDNode *FirstStore = nullptr;
6616   Optional<BaseIndexOffset> Base;
6617   for (auto Store : Stores) {
6618     // All the stores store different byte of the CombinedValue. A truncate is
6619     // required to get that byte value.
6620     SDValue Trunc = Store->getValue();
6621     if (Trunc.getOpcode() != ISD::TRUNCATE)
6622       return SDValue();
6623     // A shift operation is required to get the right byte offset, except the
6624     // first byte.
6625     int64_t Offset = 0;
6626     SDValue Value = Trunc.getOperand(0);
6627     if (Value.getOpcode() == ISD::SRL ||
6628         Value.getOpcode() == ISD::SRA) {
6629       ConstantSDNode *ShiftOffset =
6630         dyn_cast<ConstantSDNode>(Value.getOperand(1));
6631       // Trying to match the following pattern. The shift offset must be
6632       // a constant and a multiple of 8. It is the byte offset in "y".
6633       //
6634       // x = srl y, offset
6635       // i8 z = trunc x
6636       // store z, ...
6637       if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6638         return SDValue();
6639 
6640      Offset = ShiftOffset->getSExtValue()/8;
6641      Value = Value.getOperand(0);
6642     }
6643 
6644     // Stores must share the same combined value with different offsets.
6645     if (!CombinedValue)
6646       CombinedValue = Value;
6647     else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6648       return SDValue();
6649 
6650     // The trunc and all the extend operation should be stripped to get the
6651     // real value we are stored.
6652     else if (CombinedValue.getValueType() != VT) {
6653       if (Value.getValueType() == VT ||
6654           Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6655         CombinedValue = Value;
6656       // Give up if the combined value type is smaller than the store size.
6657       if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6658         return SDValue();
6659     }
6660 
6661     // Stores must share the same base address
6662     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6663     int64_t ByteOffsetFromBase = 0;
6664     if (!Base)
6665       Base = Ptr;
6666     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6667       return SDValue();
6668 
6669     // Remember the first byte store
6670     if (ByteOffsetFromBase < FirstOffset) {
6671       FirstStore = Store;
6672       FirstOffset = ByteOffsetFromBase;
6673     }
6674     // Map the offset in the store and the offset in the combined value, and
6675     // early return if it has been set before.
6676     if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6677       return SDValue();
6678     ByteOffsets[Offset] = ByteOffsetFromBase;
6679   }
6680 
6681   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6682   assert(FirstStore && "First store must be set");
6683 
6684   // Check if the bytes of the combined value we are looking at match with
6685   // either big or little endian value store.
6686   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6687   if (!IsBigEndian.hasValue())
6688     return SDValue();
6689 
6690   // The node we are looking at matches with the pattern, check if we can
6691   // replace it with a single bswap if needed and store.
6692 
6693   // If the store needs byte swap check if the target supports it
6694   bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6695 
6696   // Before legalize we can introduce illegal bswaps which will be later
6697   // converted to an explicit bswap sequence. This way we end up with a single
6698   // store and byte shuffling instead of several stores and byte shuffling.
6699   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6700     return SDValue();
6701 
6702   // Check that a store of the wide type is both allowed and fast on the target
6703   bool Fast = false;
6704   bool Allowed =
6705       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6706                              *FirstStore->getMemOperand(), &Fast);
6707   if (!Allowed || !Fast)
6708     return SDValue();
6709 
6710   if (VT != CombinedValue.getValueType()) {
6711     assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6712            "Get unexpected store value to combine");
6713     CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6714                              CombinedValue);
6715   }
6716 
6717   if (NeedsBswap)
6718     CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6719 
6720   SDValue NewStore =
6721     DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
6722                  FirstStore->getPointerInfo(), FirstStore->getAlignment());
6723 
6724   // Rely on other DAG combine rules to remove the other individual stores.
6725   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6726   return NewStore;
6727 }
6728 
6729 /// Match a pattern where a wide type scalar value is loaded by several narrow
6730 /// loads and combined by shifts and ors. Fold it into a single load or a load
6731 /// and a BSWAP if the targets supports it.
6732 ///
6733 /// Assuming little endian target:
6734 ///  i8 *a = ...
6735 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6736 /// =>
6737 ///  i32 val = *((i32)a)
6738 ///
6739 ///  i8 *a = ...
6740 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6741 /// =>
6742 ///  i32 val = BSWAP(*((i32)a))
6743 ///
6744 /// TODO: This rule matches complex patterns with OR node roots and doesn't
6745 /// interact well with the worklist mechanism. When a part of the pattern is
6746 /// updated (e.g. one of the loads) its direct users are put into the worklist,
6747 /// but the root node of the pattern which triggers the load combine is not
6748 /// necessarily a direct user of the changed node. For example, once the address
6749 /// of t28 load is reassociated load combine won't be triggered:
6750 ///             t25: i32 = add t4, Constant:i32<2>
6751 ///           t26: i64 = sign_extend t25
6752 ///        t27: i64 = add t2, t26
6753 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6754 ///     t29: i32 = zero_extend t28
6755 ///   t32: i32 = shl t29, Constant:i8<8>
6756 /// t33: i32 = or t23, t32
6757 /// As a possible fix visitLoad can check if the load can be a part of a load
6758 /// combine pattern and add corresponding OR roots to the worklist.
6759 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6760   assert(N->getOpcode() == ISD::OR &&
6761          "Can only match load combining against OR nodes");
6762 
6763   // Handles simple types only
6764   EVT VT = N->getValueType(0);
6765   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6766     return SDValue();
6767   unsigned ByteWidth = VT.getSizeInBits() / 8;
6768 
6769   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6770   auto MemoryByteOffset = [&] (ByteProvider P) {
6771     assert(P.isMemory() && "Must be a memory byte provider");
6772     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6773     assert(LoadBitWidth % 8 == 0 &&
6774            "can only analyze providers for individual bytes not bit");
6775     unsigned LoadByteWidth = LoadBitWidth / 8;
6776     return IsBigEndianTarget
6777             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6778             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6779   };
6780 
6781   Optional<BaseIndexOffset> Base;
6782   SDValue Chain;
6783 
6784   SmallPtrSet<LoadSDNode *, 8> Loads;
6785   Optional<ByteProvider> FirstByteProvider;
6786   int64_t FirstOffset = INT64_MAX;
6787 
6788   // Check if all the bytes of the OR we are looking at are loaded from the same
6789   // base address. Collect bytes offsets from Base address in ByteOffsets.
6790   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
6791   unsigned ZeroExtendedBytes = 0;
6792   for (int i = ByteWidth - 1; i >= 0; --i) {
6793     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6794     if (!P)
6795       return SDValue();
6796 
6797     if (P->isConstantZero()) {
6798       // It's OK for the N most significant bytes to be 0, we can just
6799       // zero-extend the load.
6800       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
6801         return SDValue();
6802       continue;
6803     }
6804     assert(P->isMemory() && "provenance should either be memory or zero");
6805 
6806     LoadSDNode *L = P->Load;
6807     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
6808            !L->isIndexed() &&
6809            "Must be enforced by calculateByteProvider");
6810     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6811 
6812     // All loads must share the same chain
6813     SDValue LChain = L->getChain();
6814     if (!Chain)
6815       Chain = LChain;
6816     else if (Chain != LChain)
6817       return SDValue();
6818 
6819     // Loads must share the same base address
6820     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6821     int64_t ByteOffsetFromBase = 0;
6822     if (!Base)
6823       Base = Ptr;
6824     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6825       return SDValue();
6826 
6827     // Calculate the offset of the current byte from the base address
6828     ByteOffsetFromBase += MemoryByteOffset(*P);
6829     ByteOffsets[i] = ByteOffsetFromBase;
6830 
6831     // Remember the first byte load
6832     if (ByteOffsetFromBase < FirstOffset) {
6833       FirstByteProvider = P;
6834       FirstOffset = ByteOffsetFromBase;
6835     }
6836 
6837     Loads.insert(L);
6838   }
6839   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6840          "memory, so there must be at least one load which produces the value");
6841   assert(Base && "Base address of the accessed memory location must be set");
6842   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6843 
6844   bool NeedsZext = ZeroExtendedBytes > 0;
6845 
6846   EVT MemVT =
6847       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
6848 
6849   if (!MemVT.isSimple())
6850     return SDValue();
6851 
6852   // Before legalize we can introduce too wide illegal loads which will be later
6853   // split into legal sized loads. This enables us to combine i64 load by i8
6854   // patterns to a couple of i32 loads on 32 bit targets.
6855   if (LegalOperations &&
6856       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
6857                             MemVT))
6858     return SDValue();
6859 
6860   // Check if the bytes of the OR we are looking at match with either big or
6861   // little endian value load
6862   Optional<bool> IsBigEndian = isBigEndian(
6863       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
6864   if (!IsBigEndian.hasValue())
6865     return SDValue();
6866 
6867   assert(FirstByteProvider && "must be set");
6868 
6869   // Ensure that the first byte is loaded from zero offset of the first load.
6870   // So the combined value can be loaded from the first load address.
6871   if (MemoryByteOffset(*FirstByteProvider) != 0)
6872     return SDValue();
6873   LoadSDNode *FirstLoad = FirstByteProvider->Load;
6874 
6875   // The node we are looking at matches with the pattern, check if we can
6876   // replace it with a single (possibly zero-extended) load and bswap + shift if
6877   // needed.
6878 
6879   // If the load needs byte swap check if the target supports it
6880   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6881 
6882   // Before legalize we can introduce illegal bswaps which will be later
6883   // converted to an explicit bswap sequence. This way we end up with a single
6884   // load and byte shuffling instead of several loads and byte shuffling.
6885   // We do not introduce illegal bswaps when zero-extending as this tends to
6886   // introduce too many arithmetic instructions.
6887   if (NeedsBswap && (LegalOperations || NeedsZext) &&
6888       !TLI.isOperationLegal(ISD::BSWAP, VT))
6889     return SDValue();
6890 
6891   // If we need to bswap and zero extend, we have to insert a shift. Check that
6892   // it is legal.
6893   if (NeedsBswap && NeedsZext && LegalOperations &&
6894       !TLI.isOperationLegal(ISD::SHL, VT))
6895     return SDValue();
6896 
6897   // Check that a load of the wide type is both allowed and fast on the target
6898   bool Fast = false;
6899   bool Allowed =
6900       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6901                              *FirstLoad->getMemOperand(), &Fast);
6902   if (!Allowed || !Fast)
6903     return SDValue();
6904 
6905   SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
6906                                    SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
6907                                    FirstLoad->getPointerInfo(), MemVT,
6908                                    FirstLoad->getAlignment());
6909 
6910   // Transfer chain users from old loads to the new load.
6911   for (LoadSDNode *L : Loads)
6912     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6913 
6914   if (!NeedsBswap)
6915     return NewLoad;
6916 
6917   SDValue ShiftedLoad =
6918       NeedsZext
6919           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
6920                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
6921                                                    SDLoc(N), LegalOperations))
6922           : NewLoad;
6923   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
6924 }
6925 
6926 // If the target has andn, bsl, or a similar bit-select instruction,
6927 // we want to unfold masked merge, with canonical pattern of:
6928 //   |        A  |  |B|
6929 //   ((x ^ y) & m) ^ y
6930 //    |  D  |
6931 // Into:
6932 //   (x & m) | (y & ~m)
6933 // If y is a constant, and the 'andn' does not work with immediates,
6934 // we unfold into a different pattern:
6935 //   ~(~x & m) & (m | y)
6936 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6937 //       the very least that breaks andnpd / andnps patterns, and because those
6938 //       patterns are simplified in IR and shouldn't be created in the DAG
6939 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6940   assert(N->getOpcode() == ISD::XOR);
6941 
6942   // Don't touch 'not' (i.e. where y = -1).
6943   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6944     return SDValue();
6945 
6946   EVT VT = N->getValueType(0);
6947 
6948   // There are 3 commutable operators in the pattern,
6949   // so we have to deal with 8 possible variants of the basic pattern.
6950   SDValue X, Y, M;
6951   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6952     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6953       return false;
6954     SDValue Xor = And.getOperand(XorIdx);
6955     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6956       return false;
6957     SDValue Xor0 = Xor.getOperand(0);
6958     SDValue Xor1 = Xor.getOperand(1);
6959     // Don't touch 'not' (i.e. where y = -1).
6960     if (isAllOnesOrAllOnesSplat(Xor1))
6961       return false;
6962     if (Other == Xor0)
6963       std::swap(Xor0, Xor1);
6964     if (Other != Xor1)
6965       return false;
6966     X = Xor0;
6967     Y = Xor1;
6968     M = And.getOperand(XorIdx ? 0 : 1);
6969     return true;
6970   };
6971 
6972   SDValue N0 = N->getOperand(0);
6973   SDValue N1 = N->getOperand(1);
6974   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6975       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6976     return SDValue();
6977 
6978   // Don't do anything if the mask is constant. This should not be reachable.
6979   // InstCombine should have already unfolded this pattern, and DAGCombiner
6980   // probably shouldn't produce it, too.
6981   if (isa<ConstantSDNode>(M.getNode()))
6982     return SDValue();
6983 
6984   // We can transform if the target has AndNot
6985   if (!TLI.hasAndNot(M))
6986     return SDValue();
6987 
6988   SDLoc DL(N);
6989 
6990   // If Y is a constant, check that 'andn' works with immediates.
6991   if (!TLI.hasAndNot(Y)) {
6992     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6993     // If not, we need to do a bit more work to make sure andn is still used.
6994     SDValue NotX = DAG.getNOT(DL, X, VT);
6995     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6996     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6997     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6998     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6999   }
7000 
7001   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7002   SDValue NotM = DAG.getNOT(DL, M, VT);
7003   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7004 
7005   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7006 }
7007 
7008 SDValue DAGCombiner::visitXOR(SDNode *N) {
7009   SDValue N0 = N->getOperand(0);
7010   SDValue N1 = N->getOperand(1);
7011   EVT VT = N0.getValueType();
7012 
7013   // fold vector ops
7014   if (VT.isVector()) {
7015     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7016       return FoldedVOp;
7017 
7018     // fold (xor x, 0) -> x, vector edition
7019     if (ISD::isBuildVectorAllZeros(N0.getNode()))
7020       return N1;
7021     if (ISD::isBuildVectorAllZeros(N1.getNode()))
7022       return N0;
7023   }
7024 
7025   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7026   SDLoc DL(N);
7027   if (N0.isUndef() && N1.isUndef())
7028     return DAG.getConstant(0, DL, VT);
7029 
7030   // fold (xor x, undef) -> undef
7031   if (N0.isUndef())
7032     return N0;
7033   if (N1.isUndef())
7034     return N1;
7035 
7036   // fold (xor c1, c2) -> c1^c2
7037   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7038     return C;
7039 
7040   // canonicalize constant to RHS
7041   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7042      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7043     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7044 
7045   // fold (xor x, 0) -> x
7046   if (isNullConstant(N1))
7047     return N0;
7048 
7049   if (SDValue NewSel = foldBinOpIntoSelect(N))
7050     return NewSel;
7051 
7052   // reassociate xor
7053   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7054     return RXOR;
7055 
7056   // fold !(x cc y) -> (x !cc y)
7057   unsigned N0Opcode = N0.getOpcode();
7058   SDValue LHS, RHS, CC;
7059   if (TLI.isConstTrueVal(N1.getNode()) &&
7060       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7061     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7062                                                LHS.getValueType());
7063     if (!LegalOperations ||
7064         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7065       switch (N0Opcode) {
7066       default:
7067         llvm_unreachable("Unhandled SetCC Equivalent!");
7068       case ISD::SETCC:
7069         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7070       case ISD::SELECT_CC:
7071         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7072                                N0.getOperand(3), NotCC);
7073       case ISD::STRICT_FSETCC:
7074       case ISD::STRICT_FSETCCS: {
7075         if (N0.hasOneUse()) {
7076           // FIXME Can we handle multiple uses? Could we token factor the chain
7077           // results from the new/old setcc?
7078           SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7079                                        N0.getOperand(0),
7080                                        N0Opcode == ISD::STRICT_FSETCCS);
7081           CombineTo(N, SetCC);
7082           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7083           recursivelyDeleteUnusedNodes(N0.getNode());
7084           return SDValue(N, 0); // Return N so it doesn't get rechecked!
7085         }
7086         break;
7087       }
7088       }
7089     }
7090   }
7091 
7092   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7093   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7094       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7095     SDValue V = N0.getOperand(0);
7096     SDLoc DL0(N0);
7097     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7098                     DAG.getConstant(1, DL0, V.getValueType()));
7099     AddToWorklist(V.getNode());
7100     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7101   }
7102 
7103   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7104   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7105       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7106     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7107     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7108       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7109       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7110       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7111       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7112       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7113     }
7114   }
7115   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7116   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7117       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7118     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7119     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7120       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7121       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7122       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7123       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7124       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7125     }
7126   }
7127 
7128   // fold (not (neg x)) -> (add X, -1)
7129   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7130   // Y is a constant or the subtract has a single use.
7131   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7132       isNullConstant(N0.getOperand(0))) {
7133     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7134                        DAG.getAllOnesConstant(DL, VT));
7135   }
7136 
7137   // fold (not (add X, -1)) -> (neg X)
7138   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7139       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7140     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7141                        N0.getOperand(0));
7142   }
7143 
7144   // fold (xor (and x, y), y) -> (and (not x), y)
7145   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7146     SDValue X = N0.getOperand(0);
7147     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7148     AddToWorklist(NotX.getNode());
7149     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7150   }
7151 
7152   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7153     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7154     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7155     unsigned BitWidth = VT.getScalarSizeInBits();
7156     if (XorC && ShiftC) {
7157       // Don't crash on an oversized shift. We can not guarantee that a bogus
7158       // shift has been simplified to undef.
7159       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7160       if (ShiftAmt < BitWidth) {
7161         APInt Ones = APInt::getAllOnesValue(BitWidth);
7162         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7163         if (XorC->getAPIntValue() == Ones) {
7164           // If the xor constant is a shifted -1, do a 'not' before the shift:
7165           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7166           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7167           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7168           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7169         }
7170       }
7171     }
7172   }
7173 
7174   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7175   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7176     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7177     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7178     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7179       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7180       SDValue S0 = S.getOperand(0);
7181       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7182         unsigned OpSizeInBits = VT.getScalarSizeInBits();
7183         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7184           if (C->getAPIntValue() == (OpSizeInBits - 1))
7185             return DAG.getNode(ISD::ABS, DL, VT, S0);
7186       }
7187     }
7188   }
7189 
7190   // fold (xor x, x) -> 0
7191   if (N0 == N1)
7192     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7193 
7194   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7195   // Here is a concrete example of this equivalence:
7196   // i16   x ==  14
7197   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7198   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7199   //
7200   // =>
7201   //
7202   // i16     ~1      == 0b1111111111111110
7203   // i16 rol(~1, 14) == 0b1011111111111111
7204   //
7205   // Some additional tips to help conceptualize this transform:
7206   // - Try to see the operation as placing a single zero in a value of all ones.
7207   // - There exists no value for x which would allow the result to contain zero.
7208   // - Values of x larger than the bitwidth are undefined and do not require a
7209   //   consistent result.
7210   // - Pushing the zero left requires shifting one bits in from the right.
7211   // A rotate left of ~1 is a nice way of achieving the desired result.
7212   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7213       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7214     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7215                        N0.getOperand(1));
7216   }
7217 
7218   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7219   if (N0Opcode == N1.getOpcode())
7220     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7221       return V;
7222 
7223   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7224   if (SDValue MM = unfoldMaskedMerge(N))
7225     return MM;
7226 
7227   // Simplify the expression using non-local knowledge.
7228   if (SimplifyDemandedBits(SDValue(N, 0)))
7229     return SDValue(N, 0);
7230 
7231   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7232     return Combined;
7233 
7234   return SDValue();
7235 }
7236 
7237 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7238 /// shift-by-constant operand with identical opcode, we may be able to convert
7239 /// that into 2 independent shifts followed by the logic op. This is a
7240 /// throughput improvement.
7241 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7242   // Match a one-use bitwise logic op.
7243   SDValue LogicOp = Shift->getOperand(0);
7244   if (!LogicOp.hasOneUse())
7245     return SDValue();
7246 
7247   unsigned LogicOpcode = LogicOp.getOpcode();
7248   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7249       LogicOpcode != ISD::XOR)
7250     return SDValue();
7251 
7252   // Find a matching one-use shift by constant.
7253   unsigned ShiftOpcode = Shift->getOpcode();
7254   SDValue C1 = Shift->getOperand(1);
7255   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7256   assert(C1Node && "Expected a shift with constant operand");
7257   const APInt &C1Val = C1Node->getAPIntValue();
7258   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7259                              const APInt *&ShiftAmtVal) {
7260     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7261       return false;
7262 
7263     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7264     if (!ShiftCNode)
7265       return false;
7266 
7267     // Capture the shifted operand and shift amount value.
7268     ShiftOp = V.getOperand(0);
7269     ShiftAmtVal = &ShiftCNode->getAPIntValue();
7270 
7271     // Shift amount types do not have to match their operand type, so check that
7272     // the constants are the same width.
7273     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7274       return false;
7275 
7276     // The fold is not valid if the sum of the shift values exceeds bitwidth.
7277     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7278       return false;
7279 
7280     return true;
7281   };
7282 
7283   // Logic ops are commutative, so check each operand for a match.
7284   SDValue X, Y;
7285   const APInt *C0Val;
7286   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7287     Y = LogicOp.getOperand(1);
7288   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7289     Y = LogicOp.getOperand(0);
7290   else
7291     return SDValue();
7292 
7293   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7294   SDLoc DL(Shift);
7295   EVT VT = Shift->getValueType(0);
7296   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7297   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7298   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7299   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7300   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7301 }
7302 
7303 /// Handle transforms common to the three shifts, when the shift amount is a
7304 /// constant.
7305 /// We are looking for: (shift being one of shl/sra/srl)
7306 ///   shift (binop X, C0), C1
7307 /// And want to transform into:
7308 ///   binop (shift X, C1), (shift C0, C1)
7309 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7310   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
7311 
7312   // Do not turn a 'not' into a regular xor.
7313   if (isBitwiseNot(N->getOperand(0)))
7314     return SDValue();
7315 
7316   // The inner binop must be one-use, since we want to replace it.
7317   SDValue LHS = N->getOperand(0);
7318   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7319     return SDValue();
7320 
7321   // TODO: This is limited to early combining because it may reveal regressions
7322   //       otherwise. But since we just checked a target hook to see if this is
7323   //       desirable, that should have filtered out cases where this interferes
7324   //       with some other pattern matching.
7325   if (!LegalTypes)
7326     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7327       return R;
7328 
7329   // We want to pull some binops through shifts, so that we have (and (shift))
7330   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7331   // thing happens with address calculations, so it's important to canonicalize
7332   // it.
7333   switch (LHS.getOpcode()) {
7334   default:
7335     return SDValue();
7336   case ISD::OR:
7337   case ISD::XOR:
7338   case ISD::AND:
7339     break;
7340   case ISD::ADD:
7341     if (N->getOpcode() != ISD::SHL)
7342       return SDValue(); // only shl(add) not sr[al](add).
7343     break;
7344   }
7345 
7346   // We require the RHS of the binop to be a constant and not opaque as well.
7347   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7348   if (!BinOpCst)
7349     return SDValue();
7350 
7351   // FIXME: disable this unless the input to the binop is a shift by a constant
7352   // or is copy/select. Enable this in other cases when figure out it's exactly
7353   // profitable.
7354   SDValue BinOpLHSVal = LHS.getOperand(0);
7355   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7356                             BinOpLHSVal.getOpcode() == ISD::SRA ||
7357                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
7358                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7359   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7360                         BinOpLHSVal.getOpcode() == ISD::SELECT;
7361 
7362   if (!IsShiftByConstant && !IsCopyOrSelect)
7363     return SDValue();
7364 
7365   if (IsCopyOrSelect && N->hasOneUse())
7366     return SDValue();
7367 
7368   // Fold the constants, shifting the binop RHS by the shift amount.
7369   SDLoc DL(N);
7370   EVT VT = N->getValueType(0);
7371   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7372                                N->getOperand(1));
7373   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7374 
7375   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7376                                  N->getOperand(1));
7377   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7378 }
7379 
7380 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7381   assert(N->getOpcode() == ISD::TRUNCATE);
7382   assert(N->getOperand(0).getOpcode() == ISD::AND);
7383 
7384   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7385   EVT TruncVT = N->getValueType(0);
7386   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7387       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7388     SDValue N01 = N->getOperand(0).getOperand(1);
7389     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7390       SDLoc DL(N);
7391       SDValue N00 = N->getOperand(0).getOperand(0);
7392       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7393       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7394       AddToWorklist(Trunc00.getNode());
7395       AddToWorklist(Trunc01.getNode());
7396       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7397     }
7398   }
7399 
7400   return SDValue();
7401 }
7402 
7403 SDValue DAGCombiner::visitRotate(SDNode *N) {
7404   SDLoc dl(N);
7405   SDValue N0 = N->getOperand(0);
7406   SDValue N1 = N->getOperand(1);
7407   EVT VT = N->getValueType(0);
7408   unsigned Bitsize = VT.getScalarSizeInBits();
7409 
7410   // fold (rot x, 0) -> x
7411   if (isNullOrNullSplat(N1))
7412     return N0;
7413 
7414   // fold (rot x, c) -> x iff (c % BitSize) == 0
7415   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7416     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7417     if (DAG.MaskedValueIsZero(N1, ModuloMask))
7418       return N0;
7419   }
7420 
7421   // fold (rot x, c) -> (rot x, c % BitSize)
7422   // TODO - support non-uniform vector amounts.
7423   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
7424     if (Cst->getAPIntValue().uge(Bitsize)) {
7425       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
7426       return DAG.getNode(N->getOpcode(), dl, VT, N0,
7427                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
7428     }
7429   }
7430 
7431   // Simplify the operands using demanded-bits information.
7432   if (SimplifyDemandedBits(SDValue(N, 0)))
7433     return SDValue(N, 0);
7434 
7435   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7436   if (N1.getOpcode() == ISD::TRUNCATE &&
7437       N1.getOperand(0).getOpcode() == ISD::AND) {
7438     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7439       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7440   }
7441 
7442   unsigned NextOp = N0.getOpcode();
7443   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7444   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7445     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7446     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7447     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7448       EVT ShiftVT = C1->getValueType(0);
7449       bool SameSide = (N->getOpcode() == NextOp);
7450       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7451       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
7452               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
7453         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7454         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7455             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
7456         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7457                            CombinedShiftNorm);
7458       }
7459     }
7460   }
7461   return SDValue();
7462 }
7463 
7464 SDValue DAGCombiner::visitSHL(SDNode *N) {
7465   SDValue N0 = N->getOperand(0);
7466   SDValue N1 = N->getOperand(1);
7467   if (SDValue V = DAG.simplifyShift(N0, N1))
7468     return V;
7469 
7470   EVT VT = N0.getValueType();
7471   EVT ShiftVT = N1.getValueType();
7472   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7473 
7474   // fold vector ops
7475   if (VT.isVector()) {
7476     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7477       return FoldedVOp;
7478 
7479     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7480     // If setcc produces all-one true value then:
7481     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7482     if (N1CV && N1CV->isConstant()) {
7483       if (N0.getOpcode() == ISD::AND) {
7484         SDValue N00 = N0->getOperand(0);
7485         SDValue N01 = N0->getOperand(1);
7486         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7487 
7488         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7489             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7490                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7491           if (SDValue C =
7492                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
7493             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7494         }
7495       }
7496     }
7497   }
7498 
7499   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7500 
7501   // fold (shl c1, c2) -> c1<<c2
7502   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
7503     return C;
7504 
7505   if (SDValue NewSel = foldBinOpIntoSelect(N))
7506     return NewSel;
7507 
7508   // if (shl x, c) is known to be zero, return 0
7509   if (DAG.MaskedValueIsZero(SDValue(N, 0),
7510                             APInt::getAllOnesValue(OpSizeInBits)))
7511     return DAG.getConstant(0, SDLoc(N), VT);
7512 
7513   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7514   if (N1.getOpcode() == ISD::TRUNCATE &&
7515       N1.getOperand(0).getOpcode() == ISD::AND) {
7516     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7517       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7518   }
7519 
7520   // TODO - support non-uniform vector shift amounts.
7521   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7522     return SDValue(N, 0);
7523 
7524   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7525   if (N0.getOpcode() == ISD::SHL) {
7526     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7527                                           ConstantSDNode *RHS) {
7528       APInt c1 = LHS->getAPIntValue();
7529       APInt c2 = RHS->getAPIntValue();
7530       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7531       return (c1 + c2).uge(OpSizeInBits);
7532     };
7533     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7534       return DAG.getConstant(0, SDLoc(N), VT);
7535 
7536     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7537                                        ConstantSDNode *RHS) {
7538       APInt c1 = LHS->getAPIntValue();
7539       APInt c2 = RHS->getAPIntValue();
7540       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7541       return (c1 + c2).ult(OpSizeInBits);
7542     };
7543     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7544       SDLoc DL(N);
7545       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7546       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7547     }
7548   }
7549 
7550   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7551   // For this to be valid, the second form must not preserve any of the bits
7552   // that are shifted out by the inner shift in the first form.  This means
7553   // the outer shift size must be >= the number of bits added by the ext.
7554   // As a corollary, we don't care what kind of ext it is.
7555   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7556        N0.getOpcode() == ISD::ANY_EXTEND ||
7557        N0.getOpcode() == ISD::SIGN_EXTEND) &&
7558       N0.getOperand(0).getOpcode() == ISD::SHL) {
7559     SDValue N0Op0 = N0.getOperand(0);
7560     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7561     EVT InnerVT = N0Op0.getValueType();
7562     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7563 
7564     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7565                                                          ConstantSDNode *RHS) {
7566       APInt c1 = LHS->getAPIntValue();
7567       APInt c2 = RHS->getAPIntValue();
7568       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7569       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7570              (c1 + c2).uge(OpSizeInBits);
7571     };
7572     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7573                                   /*AllowUndefs*/ false,
7574                                   /*AllowTypeMismatch*/ true))
7575       return DAG.getConstant(0, SDLoc(N), VT);
7576 
7577     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7578                                                       ConstantSDNode *RHS) {
7579       APInt c1 = LHS->getAPIntValue();
7580       APInt c2 = RHS->getAPIntValue();
7581       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7582       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7583              (c1 + c2).ult(OpSizeInBits);
7584     };
7585     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7586                                   /*AllowUndefs*/ false,
7587                                   /*AllowTypeMismatch*/ true)) {
7588       SDLoc DL(N);
7589       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7590       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7591       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7592       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7593     }
7594   }
7595 
7596   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7597   // Only fold this if the inner zext has no other uses to avoid increasing
7598   // the total number of instructions.
7599   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7600       N0.getOperand(0).getOpcode() == ISD::SRL) {
7601     SDValue N0Op0 = N0.getOperand(0);
7602     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7603 
7604     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7605       APInt c1 = LHS->getAPIntValue();
7606       APInt c2 = RHS->getAPIntValue();
7607       zeroExtendToMatch(c1, c2);
7608       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7609     };
7610     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7611                                   /*AllowUndefs*/ false,
7612                                   /*AllowTypeMismatch*/ true)) {
7613       SDLoc DL(N);
7614       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7615       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7616       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7617       AddToWorklist(NewSHL.getNode());
7618       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7619     }
7620   }
7621 
7622   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
7623   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
7624   // TODO - support non-uniform vector shift amounts.
7625   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7626       N0->getFlags().hasExact()) {
7627     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7628       uint64_t C1 = N0C1->getZExtValue();
7629       uint64_t C2 = N1C->getZExtValue();
7630       SDLoc DL(N);
7631       if (C1 <= C2)
7632         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7633                            DAG.getConstant(C2 - C1, DL, ShiftVT));
7634       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7635                          DAG.getConstant(C1 - C2, DL, ShiftVT));
7636     }
7637   }
7638 
7639   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7640   //                               (and (srl x, (sub c1, c2), MASK)
7641   // Only fold this if the inner shift has no other uses -- if it does, folding
7642   // this will increase the total number of instructions.
7643   // TODO - drop hasOneUse requirement if c1 == c2?
7644   // TODO - support non-uniform vector shift amounts.
7645   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7646       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
7647     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7648       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7649         uint64_t c1 = N0C1->getZExtValue();
7650         uint64_t c2 = N1C->getZExtValue();
7651         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7652         SDValue Shift;
7653         if (c2 > c1) {
7654           Mask <<= c2 - c1;
7655           SDLoc DL(N);
7656           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7657                               DAG.getConstant(c2 - c1, DL, ShiftVT));
7658         } else {
7659           Mask.lshrInPlace(c1 - c2);
7660           SDLoc DL(N);
7661           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7662                               DAG.getConstant(c1 - c2, DL, ShiftVT));
7663         }
7664         SDLoc DL(N0);
7665         return DAG.getNode(ISD::AND, DL, VT, Shift,
7666                            DAG.getConstant(Mask, DL, VT));
7667       }
7668     }
7669   }
7670 
7671   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7672   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7673       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7674     SDLoc DL(N);
7675     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7676     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7677     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7678   }
7679 
7680   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7681   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7682   // Variant of version done on multiply, except mul by a power of 2 is turned
7683   // into a shift.
7684   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7685       N0.getNode()->hasOneUse() &&
7686       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7687       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7688       TLI.isDesirableToCommuteWithShift(N, Level)) {
7689     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7690     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7691     AddToWorklist(Shl0.getNode());
7692     AddToWorklist(Shl1.getNode());
7693     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7694   }
7695 
7696   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7697   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7698       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7699       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7700     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7701     if (isConstantOrConstantVector(Shl))
7702       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7703   }
7704 
7705   if (N1C && !N1C->isOpaque())
7706     if (SDValue NewSHL = visitShiftByConstant(N))
7707       return NewSHL;
7708 
7709   return SDValue();
7710 }
7711 
7712 SDValue DAGCombiner::visitSRA(SDNode *N) {
7713   SDValue N0 = N->getOperand(0);
7714   SDValue N1 = N->getOperand(1);
7715   if (SDValue V = DAG.simplifyShift(N0, N1))
7716     return V;
7717 
7718   EVT VT = N0.getValueType();
7719   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7720 
7721   // Arithmetic shifting an all-sign-bit value is a no-op.
7722   // fold (sra 0, x) -> 0
7723   // fold (sra -1, x) -> -1
7724   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
7725     return N0;
7726 
7727   // fold vector ops
7728   if (VT.isVector())
7729     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7730       return FoldedVOp;
7731 
7732   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7733 
7734   // fold (sra c1, c2) -> (sra c1, c2)
7735   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
7736     return C;
7737 
7738   if (SDValue NewSel = foldBinOpIntoSelect(N))
7739     return NewSel;
7740 
7741   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7742   // sext_inreg.
7743   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
7744     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7745     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7746     if (VT.isVector())
7747       ExtVT = EVT::getVectorVT(*DAG.getContext(),
7748                                ExtVT, VT.getVectorNumElements());
7749     if (!LegalOperations ||
7750         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
7751         TargetLowering::Legal)
7752       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7753                          N0.getOperand(0), DAG.getValueType(ExtVT));
7754   }
7755 
7756   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7757   // clamp (add c1, c2) to max shift.
7758   if (N0.getOpcode() == ISD::SRA) {
7759     SDLoc DL(N);
7760     EVT ShiftVT = N1.getValueType();
7761     EVT ShiftSVT = ShiftVT.getScalarType();
7762     SmallVector<SDValue, 16> ShiftValues;
7763 
7764     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7765       APInt c1 = LHS->getAPIntValue();
7766       APInt c2 = RHS->getAPIntValue();
7767       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7768       APInt Sum = c1 + c2;
7769       unsigned ShiftSum =
7770           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
7771       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7772       return true;
7773     };
7774     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7775       SDValue ShiftValue;
7776       if (VT.isVector())
7777         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7778       else
7779         ShiftValue = ShiftValues[0];
7780       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7781     }
7782   }
7783 
7784   // fold (sra (shl X, m), (sub result_size, n))
7785   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7786   // result_size - n != m.
7787   // If truncate is free for the target sext(shl) is likely to result in better
7788   // code.
7789   if (N0.getOpcode() == ISD::SHL && N1C) {
7790     // Get the two constanst of the shifts, CN0 = m, CN = n.
7791     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7792     if (N01C) {
7793       LLVMContext &Ctx = *DAG.getContext();
7794       // Determine what the truncate's result bitsize and type would be.
7795       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7796 
7797       if (VT.isVector())
7798         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7799 
7800       // Determine the residual right-shift amount.
7801       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7802 
7803       // If the shift is not a no-op (in which case this should be just a sign
7804       // extend already), the truncated to type is legal, sign_extend is legal
7805       // on that type, and the truncate to that type is both legal and free,
7806       // perform the transform.
7807       if ((ShiftAmt > 0) &&
7808           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
7809           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
7810           TLI.isTruncateFree(VT, TruncVT)) {
7811         SDLoc DL(N);
7812         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7813             getShiftAmountTy(N0.getOperand(0).getValueType()));
7814         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7815                                     N0.getOperand(0), Amt);
7816         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7817                                     Shift);
7818         return DAG.getNode(ISD::SIGN_EXTEND, DL,
7819                            N->getValueType(0), Trunc);
7820       }
7821     }
7822   }
7823 
7824   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
7825   //   sra (add (shl X, N1C), AddC), N1C -->
7826   //   sext (add (trunc X to (width - N1C)), AddC')
7827   if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
7828       N0.getOperand(0).getOpcode() == ISD::SHL &&
7829       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
7830     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
7831       SDValue Shl = N0.getOperand(0);
7832       // Determine what the truncate's type would be and ask the target if that
7833       // is a free operation.
7834       LLVMContext &Ctx = *DAG.getContext();
7835       unsigned ShiftAmt = N1C->getZExtValue();
7836       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
7837       if (VT.isVector())
7838         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7839 
7840       // TODO: The simple type check probably belongs in the default hook
7841       //       implementation and/or target-specific overrides (because
7842       //       non-simple types likely require masking when legalized), but that
7843       //       restriction may conflict with other transforms.
7844       if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
7845         SDLoc DL(N);
7846         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
7847         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
7848                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
7849         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
7850         return DAG.getSExtOrTrunc(Add, DL, VT);
7851       }
7852     }
7853   }
7854 
7855   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7856   if (N1.getOpcode() == ISD::TRUNCATE &&
7857       N1.getOperand(0).getOpcode() == ISD::AND) {
7858     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7859       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7860   }
7861 
7862   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
7863   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7864   //      if c1 is equal to the number of bits the trunc removes
7865   // TODO - support non-uniform vector shift amounts.
7866   if (N0.getOpcode() == ISD::TRUNCATE &&
7867       (N0.getOperand(0).getOpcode() == ISD::SRL ||
7868        N0.getOperand(0).getOpcode() == ISD::SRA) &&
7869       N0.getOperand(0).hasOneUse() &&
7870       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
7871     SDValue N0Op0 = N0.getOperand(0);
7872     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7873       EVT LargeVT = N0Op0.getValueType();
7874       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
7875       if (LargeShift->getAPIntValue() == TruncBits) {
7876         SDLoc DL(N);
7877         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
7878                                       getShiftAmountTy(LargeVT));
7879         SDValue SRA =
7880             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
7881         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7882       }
7883     }
7884   }
7885 
7886   // Simplify, based on bits shifted out of the LHS.
7887   // TODO - support non-uniform vector shift amounts.
7888   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7889     return SDValue(N, 0);
7890 
7891   // If the sign bit is known to be zero, switch this to a SRL.
7892   if (DAG.SignBitIsZero(N0))
7893     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7894 
7895   if (N1C && !N1C->isOpaque())
7896     if (SDValue NewSRA = visitShiftByConstant(N))
7897       return NewSRA;
7898 
7899   return SDValue();
7900 }
7901 
7902 SDValue DAGCombiner::visitSRL(SDNode *N) {
7903   SDValue N0 = N->getOperand(0);
7904   SDValue N1 = N->getOperand(1);
7905   if (SDValue V = DAG.simplifyShift(N0, N1))
7906     return V;
7907 
7908   EVT VT = N0.getValueType();
7909   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7910 
7911   // fold vector ops
7912   if (VT.isVector())
7913     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7914       return FoldedVOp;
7915 
7916   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7917 
7918   // fold (srl c1, c2) -> c1 >>u c2
7919   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
7920     return C;
7921 
7922   if (SDValue NewSel = foldBinOpIntoSelect(N))
7923     return NewSel;
7924 
7925   // if (srl x, c) is known to be zero, return 0
7926   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
7927                                    APInt::getAllOnesValue(OpSizeInBits)))
7928     return DAG.getConstant(0, SDLoc(N), VT);
7929 
7930   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7931   if (N0.getOpcode() == ISD::SRL) {
7932     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7933                                           ConstantSDNode *RHS) {
7934       APInt c1 = LHS->getAPIntValue();
7935       APInt c2 = RHS->getAPIntValue();
7936       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7937       return (c1 + c2).uge(OpSizeInBits);
7938     };
7939     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7940       return DAG.getConstant(0, SDLoc(N), VT);
7941 
7942     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7943                                        ConstantSDNode *RHS) {
7944       APInt c1 = LHS->getAPIntValue();
7945       APInt c2 = RHS->getAPIntValue();
7946       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7947       return (c1 + c2).ult(OpSizeInBits);
7948     };
7949     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7950       SDLoc DL(N);
7951       EVT ShiftVT = N1.getValueType();
7952       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7953       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7954     }
7955   }
7956 
7957   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
7958       N0.getOperand(0).getOpcode() == ISD::SRL) {
7959     SDValue InnerShift = N0.getOperand(0);
7960     // TODO - support non-uniform vector shift amounts.
7961     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
7962       uint64_t c1 = N001C->getZExtValue();
7963       uint64_t c2 = N1C->getZExtValue();
7964       EVT InnerShiftVT = InnerShift.getValueType();
7965       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
7966       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7967       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
7968       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7969       if (c1 + OpSizeInBits == InnerShiftSize) {
7970         SDLoc DL(N);
7971         if (c1 + c2 >= InnerShiftSize)
7972           return DAG.getConstant(0, DL, VT);
7973         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
7974         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7975                                        InnerShift.getOperand(0), NewShiftAmt);
7976         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
7977       }
7978       // In the more general case, we can clear the high bits after the shift:
7979       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
7980       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
7981           c1 + c2 < InnerShiftSize) {
7982         SDLoc DL(N);
7983         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
7984         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7985                                        InnerShift.getOperand(0), NewShiftAmt);
7986         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
7987                                                             OpSizeInBits - c2),
7988                                        DL, InnerShiftVT);
7989         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
7990         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
7991       }
7992     }
7993   }
7994 
7995   // fold (srl (shl x, c), c) -> (and x, cst2)
7996   // TODO - (srl (shl x, c1), c2).
7997   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
7998       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
7999     SDLoc DL(N);
8000     SDValue Mask =
8001         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8002     AddToWorklist(Mask.getNode());
8003     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8004   }
8005 
8006   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8007   // TODO - support non-uniform vector shift amounts.
8008   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8009     // Shifting in all undef bits?
8010     EVT SmallVT = N0.getOperand(0).getValueType();
8011     unsigned BitSize = SmallVT.getScalarSizeInBits();
8012     if (N1C->getAPIntValue().uge(BitSize))
8013       return DAG.getUNDEF(VT);
8014 
8015     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8016       uint64_t ShiftAmt = N1C->getZExtValue();
8017       SDLoc DL0(N0);
8018       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8019                                        N0.getOperand(0),
8020                           DAG.getConstant(ShiftAmt, DL0,
8021                                           getShiftAmountTy(SmallVT)));
8022       AddToWorklist(SmallShift.getNode());
8023       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8024       SDLoc DL(N);
8025       return DAG.getNode(ISD::AND, DL, VT,
8026                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8027                          DAG.getConstant(Mask, DL, VT));
8028     }
8029   }
8030 
8031   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8032   // bit, which is unmodified by sra.
8033   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8034     if (N0.getOpcode() == ISD::SRA)
8035       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8036   }
8037 
8038   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8039   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8040       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8041     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8042 
8043     // If any of the input bits are KnownOne, then the input couldn't be all
8044     // zeros, thus the result of the srl will always be zero.
8045     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8046 
8047     // If all of the bits input the to ctlz node are known to be zero, then
8048     // the result of the ctlz is "32" and the result of the shift is one.
8049     APInt UnknownBits = ~Known.Zero;
8050     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8051 
8052     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8053     if (UnknownBits.isPowerOf2()) {
8054       // Okay, we know that only that the single bit specified by UnknownBits
8055       // could be set on input to the CTLZ node. If this bit is set, the SRL
8056       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8057       // to an SRL/XOR pair, which is likely to simplify more.
8058       unsigned ShAmt = UnknownBits.countTrailingZeros();
8059       SDValue Op = N0.getOperand(0);
8060 
8061       if (ShAmt) {
8062         SDLoc DL(N0);
8063         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8064                   DAG.getConstant(ShAmt, DL,
8065                                   getShiftAmountTy(Op.getValueType())));
8066         AddToWorklist(Op.getNode());
8067       }
8068 
8069       SDLoc DL(N);
8070       return DAG.getNode(ISD::XOR, DL, VT,
8071                          Op, DAG.getConstant(1, DL, VT));
8072     }
8073   }
8074 
8075   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8076   if (N1.getOpcode() == ISD::TRUNCATE &&
8077       N1.getOperand(0).getOpcode() == ISD::AND) {
8078     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8079       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8080   }
8081 
8082   // fold operands of srl based on knowledge that the low bits are not
8083   // demanded.
8084   // TODO - support non-uniform vector shift amounts.
8085   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
8086     return SDValue(N, 0);
8087 
8088   if (N1C && !N1C->isOpaque())
8089     if (SDValue NewSRL = visitShiftByConstant(N))
8090       return NewSRL;
8091 
8092   // Attempt to convert a srl of a load into a narrower zero-extending load.
8093   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8094     return NarrowLoad;
8095 
8096   // Here is a common situation. We want to optimize:
8097   //
8098   //   %a = ...
8099   //   %b = and i32 %a, 2
8100   //   %c = srl i32 %b, 1
8101   //   brcond i32 %c ...
8102   //
8103   // into
8104   //
8105   //   %a = ...
8106   //   %b = and %a, 2
8107   //   %c = setcc eq %b, 0
8108   //   brcond %c ...
8109   //
8110   // However when after the source operand of SRL is optimized into AND, the SRL
8111   // itself may not be optimized further. Look for it and add the BRCOND into
8112   // the worklist.
8113   if (N->hasOneUse()) {
8114     SDNode *Use = *N->use_begin();
8115     if (Use->getOpcode() == ISD::BRCOND)
8116       AddToWorklist(Use);
8117     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8118       // Also look pass the truncate.
8119       Use = *Use->use_begin();
8120       if (Use->getOpcode() == ISD::BRCOND)
8121         AddToWorklist(Use);
8122     }
8123   }
8124 
8125   return SDValue();
8126 }
8127 
8128 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8129   EVT VT = N->getValueType(0);
8130   SDValue N0 = N->getOperand(0);
8131   SDValue N1 = N->getOperand(1);
8132   SDValue N2 = N->getOperand(2);
8133   bool IsFSHL = N->getOpcode() == ISD::FSHL;
8134   unsigned BitWidth = VT.getScalarSizeInBits();
8135 
8136   // fold (fshl N0, N1, 0) -> N0
8137   // fold (fshr N0, N1, 0) -> N1
8138   if (isPowerOf2_32(BitWidth))
8139     if (DAG.MaskedValueIsZero(
8140             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8141       return IsFSHL ? N0 : N1;
8142 
8143   auto IsUndefOrZero = [](SDValue V) {
8144     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8145   };
8146 
8147   // TODO - support non-uniform vector shift amounts.
8148   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8149     EVT ShAmtTy = N2.getValueType();
8150 
8151     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8152     if (Cst->getAPIntValue().uge(BitWidth)) {
8153       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8154       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8155                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8156     }
8157 
8158     unsigned ShAmt = Cst->getZExtValue();
8159     if (ShAmt == 0)
8160       return IsFSHL ? N0 : N1;
8161 
8162     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8163     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8164     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8165     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8166     if (IsUndefOrZero(N0))
8167       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8168                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8169                                          SDLoc(N), ShAmtTy));
8170     if (IsUndefOrZero(N1))
8171       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8172                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8173                                          SDLoc(N), ShAmtTy));
8174   }
8175 
8176   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8177   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8178   // iff We know the shift amount is in range.
8179   // TODO: when is it worth doing SUB(BW, N2) as well?
8180   if (isPowerOf2_32(BitWidth)) {
8181     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8182     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8183       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8184     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8185       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8186   }
8187 
8188   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
8189   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
8190   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8191   // is legal as well we might be better off avoiding non-constant (BW - N2).
8192   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8193   if (N0 == N1 && hasOperation(RotOpc, VT))
8194     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8195 
8196   // Simplify, based on bits shifted out of N0/N1.
8197   if (SimplifyDemandedBits(SDValue(N, 0)))
8198     return SDValue(N, 0);
8199 
8200   return SDValue();
8201 }
8202 
8203 SDValue DAGCombiner::visitABS(SDNode *N) {
8204   SDValue N0 = N->getOperand(0);
8205   EVT VT = N->getValueType(0);
8206 
8207   // fold (abs c1) -> c2
8208   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8209     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8210   // fold (abs (abs x)) -> (abs x)
8211   if (N0.getOpcode() == ISD::ABS)
8212     return N0;
8213   // fold (abs x) -> x iff not-negative
8214   if (DAG.SignBitIsZero(N0))
8215     return N0;
8216   return SDValue();
8217 }
8218 
8219 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8220   SDValue N0 = N->getOperand(0);
8221   EVT VT = N->getValueType(0);
8222 
8223   // fold (bswap c1) -> c2
8224   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8225     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8226   // fold (bswap (bswap x)) -> x
8227   if (N0.getOpcode() == ISD::BSWAP)
8228     return N0->getOperand(0);
8229   return SDValue();
8230 }
8231 
8232 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8233   SDValue N0 = N->getOperand(0);
8234   EVT VT = N->getValueType(0);
8235 
8236   // fold (bitreverse c1) -> c2
8237   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8238     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8239   // fold (bitreverse (bitreverse x)) -> x
8240   if (N0.getOpcode() == ISD::BITREVERSE)
8241     return N0.getOperand(0);
8242   return SDValue();
8243 }
8244 
8245 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8246   SDValue N0 = N->getOperand(0);
8247   EVT VT = N->getValueType(0);
8248 
8249   // fold (ctlz c1) -> c2
8250   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8251     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8252 
8253   // If the value is known never to be zero, switch to the undef version.
8254   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8255     if (DAG.isKnownNeverZero(N0))
8256       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8257   }
8258 
8259   return SDValue();
8260 }
8261 
8262 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8263   SDValue N0 = N->getOperand(0);
8264   EVT VT = N->getValueType(0);
8265 
8266   // fold (ctlz_zero_undef c1) -> c2
8267   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8268     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8269   return SDValue();
8270 }
8271 
8272 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8273   SDValue N0 = N->getOperand(0);
8274   EVT VT = N->getValueType(0);
8275 
8276   // fold (cttz c1) -> c2
8277   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8278     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8279 
8280   // If the value is known never to be zero, switch to the undef version.
8281   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8282     if (DAG.isKnownNeverZero(N0))
8283       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8284   }
8285 
8286   return SDValue();
8287 }
8288 
8289 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8290   SDValue N0 = N->getOperand(0);
8291   EVT VT = N->getValueType(0);
8292 
8293   // fold (cttz_zero_undef c1) -> c2
8294   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8295     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8296   return SDValue();
8297 }
8298 
8299 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8300   SDValue N0 = N->getOperand(0);
8301   EVT VT = N->getValueType(0);
8302 
8303   // fold (ctpop c1) -> c2
8304   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8305     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8306   return SDValue();
8307 }
8308 
8309 // FIXME: This should be checking for no signed zeros on individual operands, as
8310 // well as no nans.
8311 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8312                                          SDValue RHS,
8313                                          const TargetLowering &TLI) {
8314   const TargetOptions &Options = DAG.getTarget().Options;
8315   EVT VT = LHS.getValueType();
8316 
8317   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8318          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8319          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8320 }
8321 
8322 /// Generate Min/Max node
8323 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8324                                    SDValue RHS, SDValue True, SDValue False,
8325                                    ISD::CondCode CC, const TargetLowering &TLI,
8326                                    SelectionDAG &DAG) {
8327   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8328     return SDValue();
8329 
8330   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8331   switch (CC) {
8332   case ISD::SETOLT:
8333   case ISD::SETOLE:
8334   case ISD::SETLT:
8335   case ISD::SETLE:
8336   case ISD::SETULT:
8337   case ISD::SETULE: {
8338     // Since it's known never nan to get here already, either fminnum or
8339     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8340     // expanded in terms of it.
8341     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8342     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8343       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8344 
8345     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8346     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8347       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8348     return SDValue();
8349   }
8350   case ISD::SETOGT:
8351   case ISD::SETOGE:
8352   case ISD::SETGT:
8353   case ISD::SETGE:
8354   case ISD::SETUGT:
8355   case ISD::SETUGE: {
8356     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8357     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8358       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8359 
8360     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8361     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8362       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8363     return SDValue();
8364   }
8365   default:
8366     return SDValue();
8367   }
8368 }
8369 
8370 /// If a (v)select has a condition value that is a sign-bit test, try to smear
8371 /// the condition operand sign-bit across the value width and use it as a mask.
8372 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
8373   SDValue Cond = N->getOperand(0);
8374   SDValue C1 = N->getOperand(1);
8375   SDValue C2 = N->getOperand(2);
8376   assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
8377          "Expected select-of-constants");
8378 
8379   EVT VT = N->getValueType(0);
8380   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
8381       VT != Cond.getOperand(0).getValueType())
8382     return SDValue();
8383 
8384   // The inverted-condition + commuted-select variants of these patterns are
8385   // canonicalized to these forms in IR.
8386   SDValue X = Cond.getOperand(0);
8387   SDValue CondC = Cond.getOperand(1);
8388   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
8389   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
8390       isAllOnesOrAllOnesSplat(C2)) {
8391     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
8392     SDLoc DL(N);
8393     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8394     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8395     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
8396   }
8397   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
8398     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
8399     SDLoc DL(N);
8400     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8401     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8402     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
8403   }
8404   return SDValue();
8405 }
8406 
8407 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8408   SDValue Cond = N->getOperand(0);
8409   SDValue N1 = N->getOperand(1);
8410   SDValue N2 = N->getOperand(2);
8411   EVT VT = N->getValueType(0);
8412   EVT CondVT = Cond.getValueType();
8413   SDLoc DL(N);
8414 
8415   if (!VT.isInteger())
8416     return SDValue();
8417 
8418   auto *C1 = dyn_cast<ConstantSDNode>(N1);
8419   auto *C2 = dyn_cast<ConstantSDNode>(N2);
8420   if (!C1 || !C2)
8421     return SDValue();
8422 
8423   // Only do this before legalization to avoid conflicting with target-specific
8424   // transforms in the other direction (create a select from a zext/sext). There
8425   // is also a target-independent combine here in DAGCombiner in the other
8426   // direction for (select Cond, -1, 0) when the condition is not i1.
8427   if (CondVT == MVT::i1 && !LegalOperations) {
8428     if (C1->isNullValue() && C2->isOne()) {
8429       // select Cond, 0, 1 --> zext (!Cond)
8430       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8431       if (VT != MVT::i1)
8432         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8433       return NotCond;
8434     }
8435     if (C1->isNullValue() && C2->isAllOnesValue()) {
8436       // select Cond, 0, -1 --> sext (!Cond)
8437       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8438       if (VT != MVT::i1)
8439         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8440       return NotCond;
8441     }
8442     if (C1->isOne() && C2->isNullValue()) {
8443       // select Cond, 1, 0 --> zext (Cond)
8444       if (VT != MVT::i1)
8445         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8446       return Cond;
8447     }
8448     if (C1->isAllOnesValue() && C2->isNullValue()) {
8449       // select Cond, -1, 0 --> sext (Cond)
8450       if (VT != MVT::i1)
8451         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8452       return Cond;
8453     }
8454 
8455     // Use a target hook because some targets may prefer to transform in the
8456     // other direction.
8457     if (TLI.convertSelectOfConstantsToMath(VT)) {
8458       // For any constants that differ by 1, we can transform the select into an
8459       // extend and add.
8460       const APInt &C1Val = C1->getAPIntValue();
8461       const APInt &C2Val = C2->getAPIntValue();
8462       if (C1Val - 1 == C2Val) {
8463         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8464         if (VT != MVT::i1)
8465           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8466         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8467       }
8468       if (C1Val + 1 == C2Val) {
8469         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8470         if (VT != MVT::i1)
8471           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8472         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8473       }
8474 
8475       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
8476       if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
8477         if (VT != MVT::i1)
8478           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8479         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
8480         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
8481       }
8482 
8483       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
8484         return V;
8485     }
8486 
8487     return SDValue();
8488   }
8489 
8490   // fold (select Cond, 0, 1) -> (xor Cond, 1)
8491   // We can't do this reliably if integer based booleans have different contents
8492   // to floating point based booleans. This is because we can't tell whether we
8493   // have an integer-based boolean or a floating-point-based boolean unless we
8494   // can find the SETCC that produced it and inspect its operands. This is
8495   // fairly easy if C is the SETCC node, but it can potentially be
8496   // undiscoverable (or not reasonably discoverable). For example, it could be
8497   // in another basic block or it could require searching a complicated
8498   // expression.
8499   if (CondVT.isInteger() &&
8500       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8501           TargetLowering::ZeroOrOneBooleanContent &&
8502       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8503           TargetLowering::ZeroOrOneBooleanContent &&
8504       C1->isNullValue() && C2->isOne()) {
8505     SDValue NotCond =
8506         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8507     if (VT.bitsEq(CondVT))
8508       return NotCond;
8509     return DAG.getZExtOrTrunc(NotCond, DL, VT);
8510   }
8511 
8512   return SDValue();
8513 }
8514 
8515 SDValue DAGCombiner::visitSELECT(SDNode *N) {
8516   SDValue N0 = N->getOperand(0);
8517   SDValue N1 = N->getOperand(1);
8518   SDValue N2 = N->getOperand(2);
8519   EVT VT = N->getValueType(0);
8520   EVT VT0 = N0.getValueType();
8521   SDLoc DL(N);
8522   SDNodeFlags Flags = N->getFlags();
8523 
8524   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8525     return V;
8526 
8527   // fold (select X, X, Y) -> (or X, Y)
8528   // fold (select X, 1, Y) -> (or C, Y)
8529   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8530     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8531 
8532   if (SDValue V = foldSelectOfConstants(N))
8533     return V;
8534 
8535   // fold (select C, 0, X) -> (and (not C), X)
8536   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8537     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8538     AddToWorklist(NOTNode.getNode());
8539     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8540   }
8541   // fold (select C, X, 1) -> (or (not C), X)
8542   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8543     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8544     AddToWorklist(NOTNode.getNode());
8545     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8546   }
8547   // fold (select X, Y, X) -> (and X, Y)
8548   // fold (select X, Y, 0) -> (and X, Y)
8549   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8550     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8551 
8552   // If we can fold this based on the true/false value, do so.
8553   if (SimplifySelectOps(N, N1, N2))
8554     return SDValue(N, 0); // Don't revisit N.
8555 
8556   if (VT0 == MVT::i1) {
8557     // The code in this block deals with the following 2 equivalences:
8558     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8559     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8560     // The target can specify its preferred form with the
8561     // shouldNormalizeToSelectSequence() callback. However we always transform
8562     // to the right anyway if we find the inner select exists in the DAG anyway
8563     // and we always transform to the left side if we know that we can further
8564     // optimize the combination of the conditions.
8565     bool normalizeToSequence =
8566         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8567     // select (and Cond0, Cond1), X, Y
8568     //   -> select Cond0, (select Cond1, X, Y), Y
8569     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8570       SDValue Cond0 = N0->getOperand(0);
8571       SDValue Cond1 = N0->getOperand(1);
8572       SDValue InnerSelect =
8573           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8574       if (normalizeToSequence || !InnerSelect.use_empty())
8575         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8576                            InnerSelect, N2, Flags);
8577       // Cleanup on failure.
8578       if (InnerSelect.use_empty())
8579         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8580     }
8581     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8582     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8583       SDValue Cond0 = N0->getOperand(0);
8584       SDValue Cond1 = N0->getOperand(1);
8585       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8586                                         Cond1, N1, N2, Flags);
8587       if (normalizeToSequence || !InnerSelect.use_empty())
8588         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8589                            InnerSelect, Flags);
8590       // Cleanup on failure.
8591       if (InnerSelect.use_empty())
8592         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8593     }
8594 
8595     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8596     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8597       SDValue N1_0 = N1->getOperand(0);
8598       SDValue N1_1 = N1->getOperand(1);
8599       SDValue N1_2 = N1->getOperand(2);
8600       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
8601         // Create the actual and node if we can generate good code for it.
8602         if (!normalizeToSequence) {
8603           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
8604           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
8605                              N2, Flags);
8606         }
8607         // Otherwise see if we can optimize the "and" to a better pattern.
8608         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
8609           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
8610                              N2, Flags);
8611         }
8612       }
8613     }
8614     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
8615     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
8616       SDValue N2_0 = N2->getOperand(0);
8617       SDValue N2_1 = N2->getOperand(1);
8618       SDValue N2_2 = N2->getOperand(2);
8619       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
8620         // Create the actual or node if we can generate good code for it.
8621         if (!normalizeToSequence) {
8622           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
8623           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
8624                              N2_2, Flags);
8625         }
8626         // Otherwise see if we can optimize to a better pattern.
8627         if (SDValue Combined = visitORLike(N0, N2_0, N))
8628           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
8629                              N2_2, Flags);
8630       }
8631     }
8632   }
8633 
8634   // select (not Cond), N1, N2 -> select Cond, N2, N1
8635   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
8636     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
8637     SelectOp->setFlags(Flags);
8638     return SelectOp;
8639   }
8640 
8641   // Fold selects based on a setcc into other things, such as min/max/abs.
8642   if (N0.getOpcode() == ISD::SETCC) {
8643     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
8644     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8645 
8646     // select (fcmp lt x, y), x, y -> fminnum x, y
8647     // select (fcmp gt x, y), x, y -> fmaxnum x, y
8648     //
8649     // This is OK if we don't care what happens if either operand is a NaN.
8650     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
8651       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
8652                                                 CC, TLI, DAG))
8653         return FMinMax;
8654 
8655     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
8656     // This is conservatively limited to pre-legal-operations to give targets
8657     // a chance to reverse the transform if they want to do that. Also, it is
8658     // unlikely that the pattern would be formed late, so it's probably not
8659     // worth going through the other checks.
8660     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
8661         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
8662         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
8663       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
8664       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
8665       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
8666         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
8667         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
8668         //
8669         // The IR equivalent of this transform would have this form:
8670         //   %a = add %x, C
8671         //   %c = icmp ugt %x, ~C
8672         //   %r = select %c, -1, %a
8673         //   =>
8674         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
8675         //   %u0 = extractvalue %u, 0
8676         //   %u1 = extractvalue %u, 1
8677         //   %r = select %u1, -1, %u0
8678         SDVTList VTs = DAG.getVTList(VT, VT0);
8679         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
8680         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
8681       }
8682     }
8683 
8684     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
8685         (!LegalOperations &&
8686          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
8687       // Any flags available in a select/setcc fold will be on the setcc as they
8688       // migrated from fcmp
8689       Flags = N0.getNode()->getFlags();
8690       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
8691                                        N2, N0.getOperand(2));
8692       SelectNode->setFlags(Flags);
8693       return SelectNode;
8694     }
8695 
8696     return SimplifySelect(DL, N0, N1, N2);
8697   }
8698 
8699   return SDValue();
8700 }
8701 
8702 // This function assumes all the vselect's arguments are CONCAT_VECTOR
8703 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
8704 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
8705   SDLoc DL(N);
8706   SDValue Cond = N->getOperand(0);
8707   SDValue LHS = N->getOperand(1);
8708   SDValue RHS = N->getOperand(2);
8709   EVT VT = N->getValueType(0);
8710   int NumElems = VT.getVectorNumElements();
8711   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
8712          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
8713          Cond.getOpcode() == ISD::BUILD_VECTOR);
8714 
8715   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
8716   // binary ones here.
8717   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
8718     return SDValue();
8719 
8720   // We're sure we have an even number of elements due to the
8721   // concat_vectors we have as arguments to vselect.
8722   // Skip BV elements until we find one that's not an UNDEF
8723   // After we find an UNDEF element, keep looping until we get to half the
8724   // length of the BV and see if all the non-undef nodes are the same.
8725   ConstantSDNode *BottomHalf = nullptr;
8726   for (int i = 0; i < NumElems / 2; ++i) {
8727     if (Cond->getOperand(i)->isUndef())
8728       continue;
8729 
8730     if (BottomHalf == nullptr)
8731       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8732     else if (Cond->getOperand(i).getNode() != BottomHalf)
8733       return SDValue();
8734   }
8735 
8736   // Do the same for the second half of the BuildVector
8737   ConstantSDNode *TopHalf = nullptr;
8738   for (int i = NumElems / 2; i < NumElems; ++i) {
8739     if (Cond->getOperand(i)->isUndef())
8740       continue;
8741 
8742     if (TopHalf == nullptr)
8743       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8744     else if (Cond->getOperand(i).getNode() != TopHalf)
8745       return SDValue();
8746   }
8747 
8748   assert(TopHalf && BottomHalf &&
8749          "One half of the selector was all UNDEFs and the other was all the "
8750          "same value. This should have been addressed before this function.");
8751   return DAG.getNode(
8752       ISD::CONCAT_VECTORS, DL, VT,
8753       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
8754       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
8755 }
8756 
8757 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
8758   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8759   SDValue Mask = MSC->getMask();
8760   SDValue Chain = MSC->getChain();
8761   SDLoc DL(N);
8762 
8763   // Zap scatters with a zero mask.
8764   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8765     return Chain;
8766 
8767   return SDValue();
8768 }
8769 
8770 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
8771   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8772   SDValue Mask = MST->getMask();
8773   SDValue Chain = MST->getChain();
8774   SDLoc DL(N);
8775 
8776   // Zap masked stores with a zero mask.
8777   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8778     return Chain;
8779 
8780   // Try transforming N to an indexed store.
8781   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
8782     return SDValue(N, 0);
8783 
8784   return SDValue();
8785 }
8786 
8787 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8788   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8789   SDValue Mask = MGT->getMask();
8790   SDLoc DL(N);
8791 
8792   // Zap gathers with a zero mask.
8793   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8794     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
8795 
8796   return SDValue();
8797 }
8798 
8799 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8800   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
8801   SDValue Mask = MLD->getMask();
8802   SDLoc DL(N);
8803 
8804   // Zap masked loads with a zero mask.
8805   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8806     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
8807 
8808   // Try transforming N to an indexed load.
8809   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
8810     return SDValue(N, 0);
8811 
8812   return SDValue();
8813 }
8814 
8815 /// A vector select of 2 constant vectors can be simplified to math/logic to
8816 /// avoid a variable select instruction and possibly avoid constant loads.
8817 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8818   SDValue Cond = N->getOperand(0);
8819   SDValue N1 = N->getOperand(1);
8820   SDValue N2 = N->getOperand(2);
8821   EVT VT = N->getValueType(0);
8822   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8823       !TLI.convertSelectOfConstantsToMath(VT) ||
8824       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8825       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8826     return SDValue();
8827 
8828   // Check if we can use the condition value to increment/decrement a single
8829   // constant value. This simplifies a select to an add and removes a constant
8830   // load/materialization from the general case.
8831   bool AllAddOne = true;
8832   bool AllSubOne = true;
8833   unsigned Elts = VT.getVectorNumElements();
8834   for (unsigned i = 0; i != Elts; ++i) {
8835     SDValue N1Elt = N1.getOperand(i);
8836     SDValue N2Elt = N2.getOperand(i);
8837     if (N1Elt.isUndef() || N2Elt.isUndef())
8838       continue;
8839 
8840     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8841     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8842     if (C1 != C2 + 1)
8843       AllAddOne = false;
8844     if (C1 != C2 - 1)
8845       AllSubOne = false;
8846   }
8847 
8848   // Further simplifications for the extra-special cases where the constants are
8849   // all 0 or all -1 should be implemented as folds of these patterns.
8850   SDLoc DL(N);
8851   if (AllAddOne || AllSubOne) {
8852     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8853     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8854     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8855     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8856     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8857   }
8858 
8859   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
8860   APInt Pow2C;
8861   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
8862       isNullOrNullSplat(N2)) {
8863     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
8864     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
8865     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
8866   }
8867 
8868   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
8869     return V;
8870 
8871   // The general case for select-of-constants:
8872   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8873   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8874   // leave that to a machine-specific pass.
8875   return SDValue();
8876 }
8877 
8878 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8879   SDValue N0 = N->getOperand(0);
8880   SDValue N1 = N->getOperand(1);
8881   SDValue N2 = N->getOperand(2);
8882   EVT VT = N->getValueType(0);
8883   SDLoc DL(N);
8884 
8885   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8886     return V;
8887 
8888   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8889   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
8890     return DAG.getSelect(DL, VT, F, N2, N1);
8891 
8892   // Canonicalize integer abs.
8893   // vselect (setg[te] X,  0),  X, -X ->
8894   // vselect (setgt    X, -1),  X, -X ->
8895   // vselect (setl[te] X,  0), -X,  X ->
8896   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8897   if (N0.getOpcode() == ISD::SETCC) {
8898     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8899     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8900     bool isAbs = false;
8901     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8902 
8903     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8904          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8905         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8906       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8907     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8908              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8909       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8910 
8911     if (isAbs) {
8912       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8913         return DAG.getNode(ISD::ABS, DL, VT, LHS);
8914 
8915       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
8916                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
8917                                                   DL, getShiftAmountTy(VT)));
8918       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8919       AddToWorklist(Shift.getNode());
8920       AddToWorklist(Add.getNode());
8921       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8922     }
8923 
8924     // vselect x, y (fcmp lt x, y) -> fminnum x, y
8925     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8926     //
8927     // This is OK if we don't care about what happens if either operand is a
8928     // NaN.
8929     //
8930     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
8931       if (SDValue FMinMax =
8932               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
8933         return FMinMax;
8934     }
8935 
8936     // If this select has a condition (setcc) with narrower operands than the
8937     // select, try to widen the compare to match the select width.
8938     // TODO: This should be extended to handle any constant.
8939     // TODO: This could be extended to handle non-loading patterns, but that
8940     //       requires thorough testing to avoid regressions.
8941     if (isNullOrNullSplat(RHS)) {
8942       EVT NarrowVT = LHS.getValueType();
8943       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8944       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8945       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8946       unsigned WideWidth = WideVT.getScalarSizeInBits();
8947       bool IsSigned = isSignedIntSetCC(CC);
8948       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8949       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8950           SetCCWidth != 1 && SetCCWidth < WideWidth &&
8951           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8952           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8953         // Both compare operands can be widened for free. The LHS can use an
8954         // extended load, and the RHS is a constant:
8955         //   vselect (ext (setcc load(X), C)), N1, N2 -->
8956         //   vselect (setcc extload(X), C'), N1, N2
8957         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8958         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8959         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8960         EVT WideSetCCVT = getSetCCResultType(WideVT);
8961         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8962         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8963       }
8964     }
8965   }
8966 
8967   if (SimplifySelectOps(N, N1, N2))
8968     return SDValue(N, 0);  // Don't revisit N.
8969 
8970   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8971   if (ISD::isBuildVectorAllOnes(N0.getNode()))
8972     return N1;
8973   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8974   if (ISD::isBuildVectorAllZeros(N0.getNode()))
8975     return N2;
8976 
8977   // The ConvertSelectToConcatVector function is assuming both the above
8978   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8979   // and addressed.
8980   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8981       N2.getOpcode() == ISD::CONCAT_VECTORS &&
8982       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8983     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8984       return CV;
8985   }
8986 
8987   if (SDValue V = foldVSelectOfConstants(N))
8988     return V;
8989 
8990   return SDValue();
8991 }
8992 
8993 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8994   SDValue N0 = N->getOperand(0);
8995   SDValue N1 = N->getOperand(1);
8996   SDValue N2 = N->getOperand(2);
8997   SDValue N3 = N->getOperand(3);
8998   SDValue N4 = N->getOperand(4);
8999   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
9000 
9001   // fold select_cc lhs, rhs, x, x, cc -> x
9002   if (N2 == N3)
9003     return N2;
9004 
9005   // Determine if the condition we're dealing with is constant
9006   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
9007                                   CC, SDLoc(N), false)) {
9008     AddToWorklist(SCC.getNode());
9009 
9010     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
9011       if (!SCCC->isNullValue())
9012         return N2;    // cond always true -> true val
9013       else
9014         return N3;    // cond always false -> false val
9015     } else if (SCC->isUndef()) {
9016       // When the condition is UNDEF, just return the first operand. This is
9017       // coherent the DAG creation, no setcc node is created in this case
9018       return N2;
9019     } else if (SCC.getOpcode() == ISD::SETCC) {
9020       // Fold to a simpler select_cc
9021       SDValue SelectOp = DAG.getNode(
9022           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
9023           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
9024       SelectOp->setFlags(SCC->getFlags());
9025       return SelectOp;
9026     }
9027   }
9028 
9029   // If we can fold this based on the true/false value, do so.
9030   if (SimplifySelectOps(N, N2, N3))
9031     return SDValue(N, 0);  // Don't revisit N.
9032 
9033   // fold select_cc into other things, such as min/max/abs
9034   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
9035 }
9036 
9037 SDValue DAGCombiner::visitSETCC(SDNode *N) {
9038   // setcc is very commonly used as an argument to brcond. This pattern
9039   // also lend itself to numerous combines and, as a result, it is desired
9040   // we keep the argument to a brcond as a setcc as much as possible.
9041   bool PreferSetCC =
9042       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
9043 
9044   SDValue Combined = SimplifySetCC(
9045       N->getValueType(0), N->getOperand(0), N->getOperand(1),
9046       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
9047 
9048   if (!Combined)
9049     return SDValue();
9050 
9051   // If we prefer to have a setcc, and we don't, we'll try our best to
9052   // recreate one using rebuildSetCC.
9053   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
9054     SDValue NewSetCC = rebuildSetCC(Combined);
9055 
9056     // We don't have anything interesting to combine to.
9057     if (NewSetCC.getNode() == N)
9058       return SDValue();
9059 
9060     if (NewSetCC)
9061       return NewSetCC;
9062   }
9063 
9064   return Combined;
9065 }
9066 
9067 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
9068   SDValue LHS = N->getOperand(0);
9069   SDValue RHS = N->getOperand(1);
9070   SDValue Carry = N->getOperand(2);
9071   SDValue Cond = N->getOperand(3);
9072 
9073   // If Carry is false, fold to a regular SETCC.
9074   if (isNullConstant(Carry))
9075     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9076 
9077   return SDValue();
9078 }
9079 
9080 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9081 /// a build_vector of constants.
9082 /// This function is called by the DAGCombiner when visiting sext/zext/aext
9083 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9084 /// Vector extends are not folded if operations are legal; this is to
9085 /// avoid introducing illegal build_vector dag nodes.
9086 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9087                                          SelectionDAG &DAG, bool LegalTypes) {
9088   unsigned Opcode = N->getOpcode();
9089   SDValue N0 = N->getOperand(0);
9090   EVT VT = N->getValueType(0);
9091   SDLoc DL(N);
9092 
9093   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9094          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9095          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
9096          && "Expected EXTEND dag node in input!");
9097 
9098   // fold (sext c1) -> c1
9099   // fold (zext c1) -> c1
9100   // fold (aext c1) -> c1
9101   if (isa<ConstantSDNode>(N0))
9102     return DAG.getNode(Opcode, DL, VT, N0);
9103 
9104   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9105   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9106   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9107   if (N0->getOpcode() == ISD::SELECT) {
9108     SDValue Op1 = N0->getOperand(1);
9109     SDValue Op2 = N0->getOperand(2);
9110     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9111         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9112       // For any_extend, choose sign extension of the constants to allow a
9113       // possible further transform to sign_extend_inreg.i.e.
9114       //
9115       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9116       // t2: i64 = any_extend t1
9117       // -->
9118       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9119       // -->
9120       // t4: i64 = sign_extend_inreg t3
9121       unsigned FoldOpc = Opcode;
9122       if (FoldOpc == ISD::ANY_EXTEND)
9123         FoldOpc = ISD::SIGN_EXTEND;
9124       return DAG.getSelect(DL, VT, N0->getOperand(0),
9125                            DAG.getNode(FoldOpc, DL, VT, Op1),
9126                            DAG.getNode(FoldOpc, DL, VT, Op2));
9127     }
9128   }
9129 
9130   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9131   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9132   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9133   EVT SVT = VT.getScalarType();
9134   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
9135       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
9136     return SDValue();
9137 
9138   // We can fold this node into a build_vector.
9139   unsigned VTBits = SVT.getSizeInBits();
9140   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9141   SmallVector<SDValue, 8> Elts;
9142   unsigned NumElts = VT.getVectorNumElements();
9143 
9144   // For zero-extensions, UNDEF elements still guarantee to have the upper
9145   // bits set to zero.
9146   bool IsZext =
9147       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
9148 
9149   for (unsigned i = 0; i != NumElts; ++i) {
9150     SDValue Op = N0.getOperand(i);
9151     if (Op.isUndef()) {
9152       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
9153       continue;
9154     }
9155 
9156     SDLoc DL(Op);
9157     // Get the constant value and if needed trunc it to the size of the type.
9158     // Nodes like build_vector might have constants wider than the scalar type.
9159     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9160     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
9161       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9162     else
9163       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9164   }
9165 
9166   return DAG.getBuildVector(VT, DL, Elts);
9167 }
9168 
9169 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9170 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9171 // transformation. Returns true if extension are possible and the above
9172 // mentioned transformation is profitable.
9173 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
9174                                     unsigned ExtOpc,
9175                                     SmallVectorImpl<SDNode *> &ExtendNodes,
9176                                     const TargetLowering &TLI) {
9177   bool HasCopyToRegUses = false;
9178   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9179   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9180                             UE = N0.getNode()->use_end();
9181        UI != UE; ++UI) {
9182     SDNode *User = *UI;
9183     if (User == N)
9184       continue;
9185     if (UI.getUse().getResNo() != N0.getResNo())
9186       continue;
9187     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9188     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
9189       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9190       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
9191         // Sign bits will be lost after a zext.
9192         return false;
9193       bool Add = false;
9194       for (unsigned i = 0; i != 2; ++i) {
9195         SDValue UseOp = User->getOperand(i);
9196         if (UseOp == N0)
9197           continue;
9198         if (!isa<ConstantSDNode>(UseOp))
9199           return false;
9200         Add = true;
9201       }
9202       if (Add)
9203         ExtendNodes.push_back(User);
9204       continue;
9205     }
9206     // If truncates aren't free and there are users we can't
9207     // extend, it isn't worthwhile.
9208     if (!isTruncFree)
9209       return false;
9210     // Remember if this value is live-out.
9211     if (User->getOpcode() == ISD::CopyToReg)
9212       HasCopyToRegUses = true;
9213   }
9214 
9215   if (HasCopyToRegUses) {
9216     bool BothLiveOut = false;
9217     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9218          UI != UE; ++UI) {
9219       SDUse &Use = UI.getUse();
9220       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9221         BothLiveOut = true;
9222         break;
9223       }
9224     }
9225     if (BothLiveOut)
9226       // Both unextended and extended values are live out. There had better be
9227       // a good reason for the transformation.
9228       return ExtendNodes.size();
9229   }
9230   return true;
9231 }
9232 
9233 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9234                                   SDValue OrigLoad, SDValue ExtLoad,
9235                                   ISD::NodeType ExtType) {
9236   // Extend SetCC uses if necessary.
9237   SDLoc DL(ExtLoad);
9238   for (SDNode *SetCC : SetCCs) {
9239     SmallVector<SDValue, 4> Ops;
9240 
9241     for (unsigned j = 0; j != 2; ++j) {
9242       SDValue SOp = SetCC->getOperand(j);
9243       if (SOp == OrigLoad)
9244         Ops.push_back(ExtLoad);
9245       else
9246         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9247     }
9248 
9249     Ops.push_back(SetCC->getOperand(2));
9250     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9251   }
9252 }
9253 
9254 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9255 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9256   SDValue N0 = N->getOperand(0);
9257   EVT DstVT = N->getValueType(0);
9258   EVT SrcVT = N0.getValueType();
9259 
9260   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9261           N->getOpcode() == ISD::ZERO_EXTEND) &&
9262          "Unexpected node type (not an extend)!");
9263 
9264   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9265   // For example, on a target with legal v4i32, but illegal v8i32, turn:
9266   //   (v8i32 (sext (v8i16 (load x))))
9267   // into:
9268   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
9269   //                          (v4i32 (sextload (x + 16)))))
9270   // Where uses of the original load, i.e.:
9271   //   (v8i16 (load x))
9272   // are replaced with:
9273   //   (v8i16 (truncate
9274   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
9275   //                            (v4i32 (sextload (x + 16)))))))
9276   //
9277   // This combine is only applicable to illegal, but splittable, vectors.
9278   // All legal types, and illegal non-vector types, are handled elsewhere.
9279   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9280   //
9281   if (N0->getOpcode() != ISD::LOAD)
9282     return SDValue();
9283 
9284   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9285 
9286   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9287       !N0.hasOneUse() || !LN0->isSimple() ||
9288       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
9289       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9290     return SDValue();
9291 
9292   SmallVector<SDNode *, 4> SetCCs;
9293   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9294     return SDValue();
9295 
9296   ISD::LoadExtType ExtType =
9297       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9298 
9299   // Try to split the vector types to get down to legal types.
9300   EVT SplitSrcVT = SrcVT;
9301   EVT SplitDstVT = DstVT;
9302   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9303          SplitSrcVT.getVectorNumElements() > 1) {
9304     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9305     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9306   }
9307 
9308   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9309     return SDValue();
9310 
9311   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
9312 
9313   SDLoc DL(N);
9314   const unsigned NumSplits =
9315       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9316   const unsigned Stride = SplitSrcVT.getStoreSize();
9317   SmallVector<SDValue, 4> Loads;
9318   SmallVector<SDValue, 4> Chains;
9319 
9320   SDValue BasePtr = LN0->getBasePtr();
9321   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9322     const unsigned Offset = Idx * Stride;
9323     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9324 
9325     SDValue SplitLoad = DAG.getExtLoad(
9326         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9327         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9328         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9329 
9330     BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
9331 
9332     Loads.push_back(SplitLoad.getValue(0));
9333     Chains.push_back(SplitLoad.getValue(1));
9334   }
9335 
9336   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9337   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9338 
9339   // Simplify TF.
9340   AddToWorklist(NewChain.getNode());
9341 
9342   CombineTo(N, NewValue);
9343 
9344   // Replace uses of the original load (before extension)
9345   // with a truncate of the concatenated sextloaded vectors.
9346   SDValue Trunc =
9347       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9348   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9349   CombineTo(N0.getNode(), Trunc, NewChain);
9350   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9351 }
9352 
9353 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9354 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9355 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9356   assert(N->getOpcode() == ISD::ZERO_EXTEND);
9357   EVT VT = N->getValueType(0);
9358   EVT OrigVT = N->getOperand(0).getValueType();
9359   if (TLI.isZExtFree(OrigVT, VT))
9360     return SDValue();
9361 
9362   // and/or/xor
9363   SDValue N0 = N->getOperand(0);
9364   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9365         N0.getOpcode() == ISD::XOR) ||
9366       N0.getOperand(1).getOpcode() != ISD::Constant ||
9367       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9368     return SDValue();
9369 
9370   // shl/shr
9371   SDValue N1 = N0->getOperand(0);
9372   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9373       N1.getOperand(1).getOpcode() != ISD::Constant ||
9374       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9375     return SDValue();
9376 
9377   // load
9378   if (!isa<LoadSDNode>(N1.getOperand(0)))
9379     return SDValue();
9380   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9381   EVT MemVT = Load->getMemoryVT();
9382   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9383       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9384     return SDValue();
9385 
9386 
9387   // If the shift op is SHL, the logic op must be AND, otherwise the result
9388   // will be wrong.
9389   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9390     return SDValue();
9391 
9392   if (!N0.hasOneUse() || !N1.hasOneUse())
9393     return SDValue();
9394 
9395   SmallVector<SDNode*, 4> SetCCs;
9396   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9397                                ISD::ZERO_EXTEND, SetCCs, TLI))
9398     return SDValue();
9399 
9400   // Actually do the transformation.
9401   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9402                                    Load->getChain(), Load->getBasePtr(),
9403                                    Load->getMemoryVT(), Load->getMemOperand());
9404 
9405   SDLoc DL1(N1);
9406   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9407                               N1.getOperand(1));
9408 
9409   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9410   Mask = Mask.zext(VT.getSizeInBits());
9411   SDLoc DL0(N0);
9412   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9413                             DAG.getConstant(Mask, DL0, VT));
9414 
9415   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9416   CombineTo(N, And);
9417   if (SDValue(Load, 0).hasOneUse()) {
9418     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9419   } else {
9420     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9421                                 Load->getValueType(0), ExtLoad);
9422     CombineTo(Load, Trunc, ExtLoad.getValue(1));
9423   }
9424 
9425   // N0 is dead at this point.
9426   recursivelyDeleteUnusedNodes(N0.getNode());
9427 
9428   return SDValue(N,0); // Return N so it doesn't get rechecked!
9429 }
9430 
9431 /// If we're narrowing or widening the result of a vector select and the final
9432 /// size is the same size as a setcc (compare) feeding the select, then try to
9433 /// apply the cast operation to the select's operands because matching vector
9434 /// sizes for a select condition and other operands should be more efficient.
9435 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9436   unsigned CastOpcode = Cast->getOpcode();
9437   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9438           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9439           CastOpcode == ISD::FP_ROUND) &&
9440          "Unexpected opcode for vector select narrowing/widening");
9441 
9442   // We only do this transform before legal ops because the pattern may be
9443   // obfuscated by target-specific operations after legalization. Do not create
9444   // an illegal select op, however, because that may be difficult to lower.
9445   EVT VT = Cast->getValueType(0);
9446   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9447     return SDValue();
9448 
9449   SDValue VSel = Cast->getOperand(0);
9450   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9451       VSel.getOperand(0).getOpcode() != ISD::SETCC)
9452     return SDValue();
9453 
9454   // Does the setcc have the same vector size as the casted select?
9455   SDValue SetCC = VSel.getOperand(0);
9456   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9457   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9458     return SDValue();
9459 
9460   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9461   SDValue A = VSel.getOperand(1);
9462   SDValue B = VSel.getOperand(2);
9463   SDValue CastA, CastB;
9464   SDLoc DL(Cast);
9465   if (CastOpcode == ISD::FP_ROUND) {
9466     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9467     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9468     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9469   } else {
9470     CastA = DAG.getNode(CastOpcode, DL, VT, A);
9471     CastB = DAG.getNode(CastOpcode, DL, VT, B);
9472   }
9473   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9474 }
9475 
9476 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9477 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9478 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9479                                      const TargetLowering &TLI, EVT VT,
9480                                      bool LegalOperations, SDNode *N,
9481                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
9482   SDNode *N0Node = N0.getNode();
9483   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9484                                                    : ISD::isZEXTLoad(N0Node);
9485   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9486       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9487     return SDValue();
9488 
9489   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9490   EVT MemVT = LN0->getMemoryVT();
9491   if ((LegalOperations || !LN0->isSimple() ||
9492        VT.isVector()) &&
9493       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9494     return SDValue();
9495 
9496   SDValue ExtLoad =
9497       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9498                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9499   Combiner.CombineTo(N, ExtLoad);
9500   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9501   if (LN0->use_empty())
9502     Combiner.recursivelyDeleteUnusedNodes(LN0);
9503   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9504 }
9505 
9506 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9507 // Only generate vector extloads when 1) they're legal, and 2) they are
9508 // deemed desirable by the target.
9509 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9510                                   const TargetLowering &TLI, EVT VT,
9511                                   bool LegalOperations, SDNode *N, SDValue N0,
9512                                   ISD::LoadExtType ExtLoadType,
9513                                   ISD::NodeType ExtOpc) {
9514   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9515       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
9516       ((LegalOperations || VT.isVector() ||
9517         !cast<LoadSDNode>(N0)->isSimple()) &&
9518        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9519     return {};
9520 
9521   bool DoXform = true;
9522   SmallVector<SDNode *, 4> SetCCs;
9523   if (!N0.hasOneUse())
9524     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9525   if (VT.isVector())
9526     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9527   if (!DoXform)
9528     return {};
9529 
9530   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9531   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9532                                    LN0->getBasePtr(), N0.getValueType(),
9533                                    LN0->getMemOperand());
9534   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9535   // If the load value is used only by N, replace it via CombineTo N.
9536   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9537   Combiner.CombineTo(N, ExtLoad);
9538   if (NoReplaceTrunc) {
9539     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9540     Combiner.recursivelyDeleteUnusedNodes(LN0);
9541   } else {
9542     SDValue Trunc =
9543         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9544     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9545   }
9546   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9547 }
9548 
9549 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
9550                                         const TargetLowering &TLI, EVT VT,
9551                                         SDNode *N, SDValue N0,
9552                                         ISD::LoadExtType ExtLoadType,
9553                                         ISD::NodeType ExtOpc) {
9554   if (!N0.hasOneUse())
9555     return SDValue();
9556 
9557   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
9558   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
9559     return SDValue();
9560 
9561   if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
9562     return SDValue();
9563 
9564   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9565     return SDValue();
9566 
9567   SDLoc dl(Ld);
9568   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
9569   SDValue NewLoad = DAG.getMaskedLoad(
9570       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
9571       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
9572       ExtLoadType, Ld->isExpandingLoad());
9573   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
9574   return NewLoad;
9575 }
9576 
9577 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9578                                        bool LegalOperations) {
9579   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9580           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9581 
9582   SDValue SetCC = N->getOperand(0);
9583   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9584       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9585     return SDValue();
9586 
9587   SDValue X = SetCC.getOperand(0);
9588   SDValue Ones = SetCC.getOperand(1);
9589   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9590   EVT VT = N->getValueType(0);
9591   EVT XVT = X.getValueType();
9592   // setge X, C is canonicalized to setgt, so we do not need to match that
9593   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9594   // not require the 'not' op.
9595   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9596     // Invert and smear/shift the sign bit:
9597     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9598     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
9599     SDLoc DL(N);
9600     unsigned ShCt = VT.getSizeInBits() - 1;
9601     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9602     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
9603       SDValue NotX = DAG.getNOT(DL, X, VT);
9604       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
9605       auto ShiftOpcode =
9606         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
9607       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
9608     }
9609   }
9610   return SDValue();
9611 }
9612 
9613 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
9614   SDValue N0 = N->getOperand(0);
9615   EVT VT = N->getValueType(0);
9616   SDLoc DL(N);
9617 
9618   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9619     return Res;
9620 
9621   // fold (sext (sext x)) -> (sext x)
9622   // fold (sext (aext x)) -> (sext x)
9623   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9624     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
9625 
9626   if (N0.getOpcode() == ISD::TRUNCATE) {
9627     // fold (sext (truncate (load x))) -> (sext (smaller load x))
9628     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
9629     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9630       SDNode *oye = N0.getOperand(0).getNode();
9631       if (NarrowLoad.getNode() != N0.getNode()) {
9632         CombineTo(N0.getNode(), NarrowLoad);
9633         // CombineTo deleted the truncate, if needed, but not what's under it.
9634         AddToWorklist(oye);
9635       }
9636       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9637     }
9638 
9639     // See if the value being truncated is already sign extended.  If so, just
9640     // eliminate the trunc/sext pair.
9641     SDValue Op = N0.getOperand(0);
9642     unsigned OpBits   = Op.getScalarValueSizeInBits();
9643     unsigned MidBits  = N0.getScalarValueSizeInBits();
9644     unsigned DestBits = VT.getScalarSizeInBits();
9645     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
9646 
9647     if (OpBits == DestBits) {
9648       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
9649       // bits, it is already ready.
9650       if (NumSignBits > DestBits-MidBits)
9651         return Op;
9652     } else if (OpBits < DestBits) {
9653       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
9654       // bits, just sext from i32.
9655       if (NumSignBits > OpBits-MidBits)
9656         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
9657     } else {
9658       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
9659       // bits, just truncate to i32.
9660       if (NumSignBits > OpBits-MidBits)
9661         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
9662     }
9663 
9664     // fold (sext (truncate x)) -> (sextinreg x).
9665     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
9666                                                  N0.getValueType())) {
9667       if (OpBits < DestBits)
9668         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
9669       else if (OpBits > DestBits)
9670         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
9671       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
9672                          DAG.getValueType(N0.getValueType()));
9673     }
9674   }
9675 
9676   // Try to simplify (sext (load x)).
9677   if (SDValue foldedExt =
9678           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9679                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
9680     return foldedExt;
9681 
9682   if (SDValue foldedExt =
9683       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
9684                                ISD::SIGN_EXTEND))
9685     return foldedExt;
9686 
9687   // fold (sext (load x)) to multiple smaller sextloads.
9688   // Only on illegal but splittable vectors.
9689   if (SDValue ExtLoad = CombineExtLoad(N))
9690     return ExtLoad;
9691 
9692   // Try to simplify (sext (sextload x)).
9693   if (SDValue foldedExt = tryToFoldExtOfExtload(
9694           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9695     return foldedExt;
9696 
9697   // fold (sext (and/or/xor (load x), cst)) ->
9698   //      (and/or/xor (sextload x), (sext cst))
9699   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9700        N0.getOpcode() == ISD::XOR) &&
9701       isa<LoadSDNode>(N0.getOperand(0)) &&
9702       N0.getOperand(1).getOpcode() == ISD::Constant &&
9703       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9704     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9705     EVT MemVT = LN00->getMemoryVT();
9706     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9707       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9708       SmallVector<SDNode*, 4> SetCCs;
9709       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9710                                              ISD::SIGN_EXTEND, SetCCs, TLI);
9711       if (DoXform) {
9712         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9713                                          LN00->getChain(), LN00->getBasePtr(),
9714                                          LN00->getMemoryVT(),
9715                                          LN00->getMemOperand());
9716         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9717         Mask = Mask.sext(VT.getSizeInBits());
9718         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9719                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9720         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9721         bool NoReplaceTruncAnd = !N0.hasOneUse();
9722         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9723         CombineTo(N, And);
9724         // If N0 has multiple uses, change other uses as well.
9725         if (NoReplaceTruncAnd) {
9726           SDValue TruncAnd =
9727               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9728           CombineTo(N0.getNode(), TruncAnd);
9729         }
9730         if (NoReplaceTrunc) {
9731           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9732         } else {
9733           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9734                                       LN00->getValueType(0), ExtLoad);
9735           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9736         }
9737         return SDValue(N,0); // Return N so it doesn't get rechecked!
9738       }
9739     }
9740   }
9741 
9742   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9743     return V;
9744 
9745   if (N0.getOpcode() == ISD::SETCC) {
9746     SDValue N00 = N0.getOperand(0);
9747     SDValue N01 = N0.getOperand(1);
9748     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9749     EVT N00VT = N0.getOperand(0).getValueType();
9750 
9751     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9752     // Only do this before legalize for now.
9753     if (VT.isVector() && !LegalOperations &&
9754         TLI.getBooleanContents(N00VT) ==
9755             TargetLowering::ZeroOrNegativeOneBooleanContent) {
9756       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9757       // of the same size as the compared operands. Only optimize sext(setcc())
9758       // if this is the case.
9759       EVT SVT = getSetCCResultType(N00VT);
9760 
9761       // If we already have the desired type, don't change it.
9762       if (SVT != N0.getValueType()) {
9763         // We know that the # elements of the results is the same as the
9764         // # elements of the compare (and the # elements of the compare result
9765         // for that matter).  Check to see that they are the same size.  If so,
9766         // we know that the element size of the sext'd result matches the
9767         // element size of the compare operands.
9768         if (VT.getSizeInBits() == SVT.getSizeInBits())
9769           return DAG.getSetCC(DL, VT, N00, N01, CC);
9770 
9771         // If the desired elements are smaller or larger than the source
9772         // elements, we can use a matching integer vector type and then
9773         // truncate/sign extend.
9774         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9775         if (SVT == MatchingVecType) {
9776           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9777           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9778         }
9779       }
9780     }
9781 
9782     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9783     // Here, T can be 1 or -1, depending on the type of the setcc and
9784     // getBooleanContents().
9785     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9786 
9787     // To determine the "true" side of the select, we need to know the high bit
9788     // of the value returned by the setcc if it evaluates to true.
9789     // If the type of the setcc is i1, then the true case of the select is just
9790     // sext(i1 1), that is, -1.
9791     // If the type of the setcc is larger (say, i8) then the value of the high
9792     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9793     // of the appropriate width.
9794     SDValue ExtTrueVal = (SetCCWidth == 1)
9795                              ? DAG.getAllOnesConstant(DL, VT)
9796                              : DAG.getBoolConstant(true, DL, VT, N00VT);
9797     SDValue Zero = DAG.getConstant(0, DL, VT);
9798     if (SDValue SCC =
9799             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9800       return SCC;
9801 
9802     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
9803       EVT SetCCVT = getSetCCResultType(N00VT);
9804       // Don't do this transform for i1 because there's a select transform
9805       // that would reverse it.
9806       // TODO: We should not do this transform at all without a target hook
9807       // because a sext is likely cheaper than a select?
9808       if (SetCCVT.getScalarSizeInBits() != 1 &&
9809           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
9810         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9811         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9812       }
9813     }
9814   }
9815 
9816   // fold (sext x) -> (zext x) if the sign bit is known zero.
9817   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
9818       DAG.SignBitIsZero(N0))
9819     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9820 
9821   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9822     return NewVSel;
9823 
9824   // Eliminate this sign extend by doing a negation in the destination type:
9825   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9826   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
9827       isNullOrNullSplat(N0.getOperand(0)) &&
9828       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
9829       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
9830     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9831     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9832   }
9833   // Eliminate this sign extend by doing a decrement in the destination type:
9834   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9835   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
9836       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
9837       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9838       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
9839     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9840     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9841   }
9842 
9843   return SDValue();
9844 }
9845 
9846 // isTruncateOf - If N is a truncate of some other value, return true, record
9847 // the value being truncated in Op and which of Op's bits are zero/one in Known.
9848 // This function computes KnownBits to avoid a duplicated call to
9849 // computeKnownBits in the caller.
9850 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
9851                          KnownBits &Known) {
9852   if (N->getOpcode() == ISD::TRUNCATE) {
9853     Op = N->getOperand(0);
9854     Known = DAG.computeKnownBits(Op);
9855     return true;
9856   }
9857 
9858   if (N.getOpcode() != ISD::SETCC ||
9859       N.getValueType().getScalarType() != MVT::i1 ||
9860       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
9861     return false;
9862 
9863   SDValue Op0 = N->getOperand(0);
9864   SDValue Op1 = N->getOperand(1);
9865   assert(Op0.getValueType() == Op1.getValueType());
9866 
9867   if (isNullOrNullSplat(Op0))
9868     Op = Op1;
9869   else if (isNullOrNullSplat(Op1))
9870     Op = Op0;
9871   else
9872     return false;
9873 
9874   Known = DAG.computeKnownBits(Op);
9875 
9876   return (Known.Zero | 1).isAllOnesValue();
9877 }
9878 
9879 /// Given an extending node with a pop-count operand, if the target does not
9880 /// support a pop-count in the narrow source type but does support it in the
9881 /// destination type, widen the pop-count to the destination type.
9882 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
9883   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
9884           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
9885 
9886   SDValue CtPop = Extend->getOperand(0);
9887   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
9888     return SDValue();
9889 
9890   EVT VT = Extend->getValueType(0);
9891   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9892   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
9893       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
9894     return SDValue();
9895 
9896   // zext (ctpop X) --> ctpop (zext X)
9897   SDLoc DL(Extend);
9898   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
9899   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
9900 }
9901 
9902 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9903   SDValue N0 = N->getOperand(0);
9904   EVT VT = N->getValueType(0);
9905 
9906   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9907     return Res;
9908 
9909   // fold (zext (zext x)) -> (zext x)
9910   // fold (zext (aext x)) -> (zext x)
9911   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9912     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9913                        N0.getOperand(0));
9914 
9915   // fold (zext (truncate x)) -> (zext x) or
9916   //      (zext (truncate x)) -> (truncate x)
9917   // This is valid when the truncated bits of x are already zero.
9918   SDValue Op;
9919   KnownBits Known;
9920   if (isTruncateOf(DAG, N0, Op, Known)) {
9921     APInt TruncatedBits =
9922       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9923       APInt(Op.getScalarValueSizeInBits(), 0) :
9924       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9925                         N0.getScalarValueSizeInBits(),
9926                         std::min(Op.getScalarValueSizeInBits(),
9927                                  VT.getScalarSizeInBits()));
9928     if (TruncatedBits.isSubsetOf(Known.Zero))
9929       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9930   }
9931 
9932   // fold (zext (truncate x)) -> (and x, mask)
9933   if (N0.getOpcode() == ISD::TRUNCATE) {
9934     // fold (zext (truncate (load x))) -> (zext (smaller load x))
9935     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9936     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9937       SDNode *oye = N0.getOperand(0).getNode();
9938       if (NarrowLoad.getNode() != N0.getNode()) {
9939         CombineTo(N0.getNode(), NarrowLoad);
9940         // CombineTo deleted the truncate, if needed, but not what's under it.
9941         AddToWorklist(oye);
9942       }
9943       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9944     }
9945 
9946     EVT SrcVT = N0.getOperand(0).getValueType();
9947     EVT MinVT = N0.getValueType();
9948 
9949     // Try to mask before the extension to avoid having to generate a larger mask,
9950     // possibly over several sub-vectors.
9951     if (SrcVT.bitsLT(VT) && VT.isVector()) {
9952       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9953                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
9954         SDValue Op = N0.getOperand(0);
9955         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9956         AddToWorklist(Op.getNode());
9957         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9958         // Transfer the debug info; the new node is equivalent to N0.
9959         DAG.transferDbgValues(N0, ZExtOrTrunc);
9960         return ZExtOrTrunc;
9961       }
9962     }
9963 
9964     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9965       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9966       AddToWorklist(Op.getNode());
9967       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9968       // We may safely transfer the debug info describing the truncate node over
9969       // to the equivalent and operation.
9970       DAG.transferDbgValues(N0, And);
9971       return And;
9972     }
9973   }
9974 
9975   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9976   // if either of the casts is not free.
9977   if (N0.getOpcode() == ISD::AND &&
9978       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9979       N0.getOperand(1).getOpcode() == ISD::Constant &&
9980       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9981                            N0.getValueType()) ||
9982        !TLI.isZExtFree(N0.getValueType(), VT))) {
9983     SDValue X = N0.getOperand(0).getOperand(0);
9984     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9985     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9986     Mask = Mask.zext(VT.getSizeInBits());
9987     SDLoc DL(N);
9988     return DAG.getNode(ISD::AND, DL, VT,
9989                        X, DAG.getConstant(Mask, DL, VT));
9990   }
9991 
9992   // Try to simplify (zext (load x)).
9993   if (SDValue foldedExt =
9994           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9995                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9996     return foldedExt;
9997 
9998   if (SDValue foldedExt =
9999       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
10000                                ISD::ZERO_EXTEND))
10001     return foldedExt;
10002 
10003   // fold (zext (load x)) to multiple smaller zextloads.
10004   // Only on illegal but splittable vectors.
10005   if (SDValue ExtLoad = CombineExtLoad(N))
10006     return ExtLoad;
10007 
10008   // fold (zext (and/or/xor (load x), cst)) ->
10009   //      (and/or/xor (zextload x), (zext cst))
10010   // Unless (and (load x) cst) will match as a zextload already and has
10011   // additional users.
10012   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10013        N0.getOpcode() == ISD::XOR) &&
10014       isa<LoadSDNode>(N0.getOperand(0)) &&
10015       N0.getOperand(1).getOpcode() == ISD::Constant &&
10016       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10017     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10018     EVT MemVT = LN00->getMemoryVT();
10019     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
10020         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
10021       bool DoXform = true;
10022       SmallVector<SDNode*, 4> SetCCs;
10023       if (!N0.hasOneUse()) {
10024         if (N0.getOpcode() == ISD::AND) {
10025           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
10026           EVT LoadResultTy = AndC->getValueType(0);
10027           EVT ExtVT;
10028           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
10029             DoXform = false;
10030         }
10031       }
10032       if (DoXform)
10033         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10034                                           ISD::ZERO_EXTEND, SetCCs, TLI);
10035       if (DoXform) {
10036         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
10037                                          LN00->getChain(), LN00->getBasePtr(),
10038                                          LN00->getMemoryVT(),
10039                                          LN00->getMemOperand());
10040         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
10041         Mask = Mask.zext(VT.getSizeInBits());
10042         SDLoc DL(N);
10043         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10044                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10045         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10046         bool NoReplaceTruncAnd = !N0.hasOneUse();
10047         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10048         CombineTo(N, And);
10049         // If N0 has multiple uses, change other uses as well.
10050         if (NoReplaceTruncAnd) {
10051           SDValue TruncAnd =
10052               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10053           CombineTo(N0.getNode(), TruncAnd);
10054         }
10055         if (NoReplaceTrunc) {
10056           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10057         } else {
10058           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10059                                       LN00->getValueType(0), ExtLoad);
10060           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10061         }
10062         return SDValue(N,0); // Return N so it doesn't get rechecked!
10063       }
10064     }
10065   }
10066 
10067   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10068   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10069   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
10070     return ZExtLoad;
10071 
10072   // Try to simplify (zext (zextload x)).
10073   if (SDValue foldedExt = tryToFoldExtOfExtload(
10074           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
10075     return foldedExt;
10076 
10077   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10078     return V;
10079 
10080   if (N0.getOpcode() == ISD::SETCC) {
10081     // Only do this before legalize for now.
10082     if (!LegalOperations && VT.isVector() &&
10083         N0.getValueType().getVectorElementType() == MVT::i1) {
10084       EVT N00VT = N0.getOperand(0).getValueType();
10085       if (getSetCCResultType(N00VT) == N0.getValueType())
10086         return SDValue();
10087 
10088       // We know that the # elements of the results is the same as the #
10089       // elements of the compare (and the # elements of the compare result for
10090       // that matter). Check to see that they are the same size. If so, we know
10091       // that the element size of the sext'd result matches the element size of
10092       // the compare operands.
10093       SDLoc DL(N);
10094       SDValue VecOnes = DAG.getConstant(1, DL, VT);
10095       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
10096         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
10097         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
10098                                      N0.getOperand(1), N0.getOperand(2));
10099         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
10100       }
10101 
10102       // If the desired elements are smaller or larger than the source
10103       // elements we can use a matching integer vector type and then
10104       // truncate/sign extend.
10105       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10106       SDValue VsetCC =
10107           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
10108                       N0.getOperand(1), N0.getOperand(2));
10109       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
10110                          VecOnes);
10111     }
10112 
10113     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10114     SDLoc DL(N);
10115     if (SDValue SCC = SimplifySelectCC(
10116             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10117             DAG.getConstant(0, DL, VT),
10118             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10119       return SCC;
10120   }
10121 
10122   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
10123   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10124       isa<ConstantSDNode>(N0.getOperand(1)) &&
10125       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10126       N0.hasOneUse()) {
10127     SDValue ShAmt = N0.getOperand(1);
10128     if (N0.getOpcode() == ISD::SHL) {
10129       SDValue InnerZExt = N0.getOperand(0);
10130       // If the original shl may be shifting out bits, do not perform this
10131       // transformation.
10132       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
10133         InnerZExt.getOperand(0).getValueSizeInBits();
10134       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
10135         return SDValue();
10136     }
10137 
10138     SDLoc DL(N);
10139 
10140     // Ensure that the shift amount is wide enough for the shifted value.
10141     if (VT.getSizeInBits() >= 256)
10142       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
10143 
10144     return DAG.getNode(N0.getOpcode(), DL, VT,
10145                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10146                        ShAmt);
10147   }
10148 
10149   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10150     return NewVSel;
10151 
10152   if (SDValue NewCtPop = widenCtPop(N, DAG))
10153     return NewCtPop;
10154 
10155   return SDValue();
10156 }
10157 
10158 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10159   SDValue N0 = N->getOperand(0);
10160   EVT VT = N->getValueType(0);
10161 
10162   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10163     return Res;
10164 
10165   // fold (aext (aext x)) -> (aext x)
10166   // fold (aext (zext x)) -> (zext x)
10167   // fold (aext (sext x)) -> (sext x)
10168   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
10169       N0.getOpcode() == ISD::ZERO_EXTEND ||
10170       N0.getOpcode() == ISD::SIGN_EXTEND)
10171     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10172 
10173   // fold (aext (truncate (load x))) -> (aext (smaller load x))
10174   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10175   if (N0.getOpcode() == ISD::TRUNCATE) {
10176     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10177       SDNode *oye = N0.getOperand(0).getNode();
10178       if (NarrowLoad.getNode() != N0.getNode()) {
10179         CombineTo(N0.getNode(), NarrowLoad);
10180         // CombineTo deleted the truncate, if needed, but not what's under it.
10181         AddToWorklist(oye);
10182       }
10183       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10184     }
10185   }
10186 
10187   // fold (aext (truncate x))
10188   if (N0.getOpcode() == ISD::TRUNCATE)
10189     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10190 
10191   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10192   // if the trunc is not free.
10193   if (N0.getOpcode() == ISD::AND &&
10194       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10195       N0.getOperand(1).getOpcode() == ISD::Constant &&
10196       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10197                           N0.getValueType())) {
10198     SDLoc DL(N);
10199     SDValue X = N0.getOperand(0).getOperand(0);
10200     X = DAG.getAnyExtOrTrunc(X, DL, VT);
10201     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
10202     Mask = Mask.zext(VT.getSizeInBits());
10203     return DAG.getNode(ISD::AND, DL, VT,
10204                        X, DAG.getConstant(Mask, DL, VT));
10205   }
10206 
10207   // fold (aext (load x)) -> (aext (truncate (extload x)))
10208   // None of the supported targets knows how to perform load and any_ext
10209   // on vectors in one instruction.  We only perform this transformation on
10210   // scalars.
10211   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
10212       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10213       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10214     bool DoXform = true;
10215     SmallVector<SDNode*, 4> SetCCs;
10216     if (!N0.hasOneUse())
10217       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
10218                                         TLI);
10219     if (DoXform) {
10220       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10221       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10222                                        LN0->getChain(),
10223                                        LN0->getBasePtr(), N0.getValueType(),
10224                                        LN0->getMemOperand());
10225       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10226       // If the load value is used only by N, replace it via CombineTo N.
10227       bool NoReplaceTrunc = N0.hasOneUse();
10228       CombineTo(N, ExtLoad);
10229       if (NoReplaceTrunc) {
10230         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10231         recursivelyDeleteUnusedNodes(LN0);
10232       } else {
10233         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10234                                     N0.getValueType(), ExtLoad);
10235         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10236       }
10237       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10238     }
10239   }
10240 
10241   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10242   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10243   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
10244   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10245       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10246     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10247     ISD::LoadExtType ExtType = LN0->getExtensionType();
10248     EVT MemVT = LN0->getMemoryVT();
10249     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10250       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10251                                        VT, LN0->getChain(), LN0->getBasePtr(),
10252                                        MemVT, LN0->getMemOperand());
10253       CombineTo(N, ExtLoad);
10254       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10255       recursivelyDeleteUnusedNodes(LN0);
10256       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10257     }
10258   }
10259 
10260   if (N0.getOpcode() == ISD::SETCC) {
10261     // For vectors:
10262     // aext(setcc) -> vsetcc
10263     // aext(setcc) -> truncate(vsetcc)
10264     // aext(setcc) -> aext(vsetcc)
10265     // Only do this before legalize for now.
10266     if (VT.isVector() && !LegalOperations) {
10267       EVT N00VT = N0.getOperand(0).getValueType();
10268       if (getSetCCResultType(N00VT) == N0.getValueType())
10269         return SDValue();
10270 
10271       // We know that the # elements of the results is the same as the
10272       // # elements of the compare (and the # elements of the compare result
10273       // for that matter).  Check to see that they are the same size.  If so,
10274       // we know that the element size of the sext'd result matches the
10275       // element size of the compare operands.
10276       if (VT.getSizeInBits() == N00VT.getSizeInBits())
10277         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10278                              N0.getOperand(1),
10279                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
10280 
10281       // If the desired elements are smaller or larger than the source
10282       // elements we can use a matching integer vector type and then
10283       // truncate/any extend
10284       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10285       SDValue VsetCC =
10286         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10287                       N0.getOperand(1),
10288                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
10289       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10290     }
10291 
10292     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10293     SDLoc DL(N);
10294     if (SDValue SCC = SimplifySelectCC(
10295             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10296             DAG.getConstant(0, DL, VT),
10297             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10298       return SCC;
10299   }
10300 
10301   if (SDValue NewCtPop = widenCtPop(N, DAG))
10302     return NewCtPop;
10303 
10304   return SDValue();
10305 }
10306 
10307 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10308   unsigned Opcode = N->getOpcode();
10309   SDValue N0 = N->getOperand(0);
10310   SDValue N1 = N->getOperand(1);
10311   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10312 
10313   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10314   if (N0.getOpcode() == Opcode &&
10315       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10316     return N0;
10317 
10318   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10319       N0.getOperand(0).getOpcode() == Opcode) {
10320     // We have an assert, truncate, assert sandwich. Make one stronger assert
10321     // by asserting on the smallest asserted type to the larger source type.
10322     // This eliminates the later assert:
10323     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10324     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10325     SDValue BigA = N0.getOperand(0);
10326     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10327     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10328            "Asserting zero/sign-extended bits to a type larger than the "
10329            "truncated destination does not provide information");
10330 
10331     SDLoc DL(N);
10332     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10333     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10334     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10335                                     BigA.getOperand(0), MinAssertVTVal);
10336     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10337   }
10338 
10339   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10340   // than X. Just move the AssertZext in front of the truncate and drop the
10341   // AssertSExt.
10342   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10343       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10344       Opcode == ISD::AssertZext) {
10345     SDValue BigA = N0.getOperand(0);
10346     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10347     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10348            "Asserting zero/sign-extended bits to a type larger than the "
10349            "truncated destination does not provide information");
10350 
10351     if (AssertVT.bitsLT(BigA_AssertVT)) {
10352       SDLoc DL(N);
10353       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10354                                       BigA.getOperand(0), N1);
10355       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10356     }
10357   }
10358 
10359   return SDValue();
10360 }
10361 
10362 /// If the result of a wider load is shifted to right of N  bits and then
10363 /// truncated to a narrower type and where N is a multiple of number of bits of
10364 /// the narrower type, transform it to a narrower load from address + N / num of
10365 /// bits of new type. Also narrow the load if the result is masked with an AND
10366 /// to effectively produce a smaller type. If the result is to be extended, also
10367 /// fold the extension to form a extending load.
10368 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10369   unsigned Opc = N->getOpcode();
10370 
10371   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10372   SDValue N0 = N->getOperand(0);
10373   EVT VT = N->getValueType(0);
10374   EVT ExtVT = VT;
10375 
10376   // This transformation isn't valid for vector loads.
10377   if (VT.isVector())
10378     return SDValue();
10379 
10380   unsigned ShAmt = 0;
10381   bool HasShiftedOffset = false;
10382   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10383   // extended to VT.
10384   if (Opc == ISD::SIGN_EXTEND_INREG) {
10385     ExtType = ISD::SEXTLOAD;
10386     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10387   } else if (Opc == ISD::SRL) {
10388     // Another special-case: SRL is basically zero-extending a narrower value,
10389     // or it maybe shifting a higher subword, half or byte into the lowest
10390     // bits.
10391     ExtType = ISD::ZEXTLOAD;
10392     N0 = SDValue(N, 0);
10393 
10394     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10395     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10396     if (!N01 || !LN0)
10397       return SDValue();
10398 
10399     uint64_t ShiftAmt = N01->getZExtValue();
10400     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10401     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10402       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10403     else
10404       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10405                                 VT.getSizeInBits() - ShiftAmt);
10406   } else if (Opc == ISD::AND) {
10407     // An AND with a constant mask is the same as a truncate + zero-extend.
10408     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10409     if (!AndC)
10410       return SDValue();
10411 
10412     const APInt &Mask = AndC->getAPIntValue();
10413     unsigned ActiveBits = 0;
10414     if (Mask.isMask()) {
10415       ActiveBits = Mask.countTrailingOnes();
10416     } else if (Mask.isShiftedMask()) {
10417       ShAmt = Mask.countTrailingZeros();
10418       APInt ShiftedMask = Mask.lshr(ShAmt);
10419       ActiveBits = ShiftedMask.countTrailingOnes();
10420       HasShiftedOffset = true;
10421     } else
10422       return SDValue();
10423 
10424     ExtType = ISD::ZEXTLOAD;
10425     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10426   }
10427 
10428   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10429     SDValue SRL = N0;
10430     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10431       ShAmt = ConstShift->getZExtValue();
10432       unsigned EVTBits = ExtVT.getSizeInBits();
10433       // Is the shift amount a multiple of size of VT?
10434       if ((ShAmt & (EVTBits-1)) == 0) {
10435         N0 = N0.getOperand(0);
10436         // Is the load width a multiple of size of VT?
10437         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10438           return SDValue();
10439       }
10440 
10441       // At this point, we must have a load or else we can't do the transform.
10442       if (!isa<LoadSDNode>(N0)) return SDValue();
10443 
10444       auto *LN0 = cast<LoadSDNode>(N0);
10445 
10446       // Because a SRL must be assumed to *need* to zero-extend the high bits
10447       // (as opposed to anyext the high bits), we can't combine the zextload
10448       // lowering of SRL and an sextload.
10449       if (LN0->getExtensionType() == ISD::SEXTLOAD)
10450         return SDValue();
10451 
10452       // If the shift amount is larger than the input type then we're not
10453       // accessing any of the loaded bytes.  If the load was a zextload/extload
10454       // then the result of the shift+trunc is zero/undef (handled elsewhere).
10455       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10456         return SDValue();
10457 
10458       // If the SRL is only used by a masking AND, we may be able to adjust
10459       // the ExtVT to make the AND redundant.
10460       SDNode *Mask = *(SRL->use_begin());
10461       if (Mask->getOpcode() == ISD::AND &&
10462           isa<ConstantSDNode>(Mask->getOperand(1))) {
10463         const APInt &ShiftMask =
10464           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
10465         if (ShiftMask.isMask()) {
10466           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10467                                            ShiftMask.countTrailingOnes());
10468           // If the mask is smaller, recompute the type.
10469           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10470               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10471             ExtVT = MaskedVT;
10472         }
10473       }
10474     }
10475   }
10476 
10477   // If the load is shifted left (and the result isn't shifted back right),
10478   // we can fold the truncate through the shift.
10479   unsigned ShLeftAmt = 0;
10480   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10481       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10482     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10483       ShLeftAmt = N01->getZExtValue();
10484       N0 = N0.getOperand(0);
10485     }
10486   }
10487 
10488   // If we haven't found a load, we can't narrow it.
10489   if (!isa<LoadSDNode>(N0))
10490     return SDValue();
10491 
10492   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10493   // Reducing the width of a volatile load is illegal.  For atomics, we may be
10494   // able to reduce the width provided we never widen again. (see D66309)
10495   if (!LN0->isSimple() ||
10496       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10497     return SDValue();
10498 
10499   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10500     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10501     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10502     return LVTStoreBits - EVTStoreBits - ShAmt;
10503   };
10504 
10505   // For big endian targets, we need to adjust the offset to the pointer to
10506   // load the correct bytes.
10507   if (DAG.getDataLayout().isBigEndian())
10508     ShAmt = AdjustBigEndianShift(ShAmt);
10509 
10510   uint64_t PtrOff = ShAmt / 8;
10511   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10512   SDLoc DL(LN0);
10513   // The original load itself didn't wrap, so an offset within it doesn't.
10514   SDNodeFlags Flags;
10515   Flags.setNoUnsignedWrap(true);
10516   SDValue NewPtr =
10517       DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
10518   AddToWorklist(NewPtr.getNode());
10519 
10520   SDValue Load;
10521   if (ExtType == ISD::NON_EXTLOAD)
10522     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
10523                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10524                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10525   else
10526     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
10527                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10528                           NewAlign, LN0->getMemOperand()->getFlags(),
10529                           LN0->getAAInfo());
10530 
10531   // Replace the old load's chain with the new load's chain.
10532   WorklistRemover DeadNodes(*this);
10533   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10534 
10535   // Shift the result left, if we've swallowed a left shift.
10536   SDValue Result = Load;
10537   if (ShLeftAmt != 0) {
10538     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10539     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10540       ShImmTy = VT;
10541     // If the shift amount is as large as the result size (but, presumably,
10542     // no larger than the source) then the useful bits of the result are
10543     // zero; we can't simply return the shortened shift, because the result
10544     // of that operation is undefined.
10545     if (ShLeftAmt >= VT.getSizeInBits())
10546       Result = DAG.getConstant(0, DL, VT);
10547     else
10548       Result = DAG.getNode(ISD::SHL, DL, VT,
10549                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10550   }
10551 
10552   if (HasShiftedOffset) {
10553     // Recalculate the shift amount after it has been altered to calculate
10554     // the offset.
10555     if (DAG.getDataLayout().isBigEndian())
10556       ShAmt = AdjustBigEndianShift(ShAmt);
10557 
10558     // We're using a shifted mask, so the load now has an offset. This means
10559     // that data has been loaded into the lower bytes than it would have been
10560     // before, so we need to shl the loaded data into the correct position in the
10561     // register.
10562     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10563     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10564     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10565   }
10566 
10567   // Return the new loaded value.
10568   return Result;
10569 }
10570 
10571 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
10572   SDValue N0 = N->getOperand(0);
10573   SDValue N1 = N->getOperand(1);
10574   EVT VT = N->getValueType(0);
10575   EVT EVT = cast<VTSDNode>(N1)->getVT();
10576   unsigned VTBits = VT.getScalarSizeInBits();
10577   unsigned EVTBits = EVT.getScalarSizeInBits();
10578 
10579   if (N0.isUndef())
10580     return DAG.getUNDEF(VT);
10581 
10582   // fold (sext_in_reg c1) -> c1
10583   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10584     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
10585 
10586   // If the input is already sign extended, just drop the extension.
10587   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
10588     return N0;
10589 
10590   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
10591   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
10592       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
10593     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10594                        N0.getOperand(0), N1);
10595 
10596   // fold (sext_in_reg (sext x)) -> (sext x)
10597   // fold (sext_in_reg (aext x)) -> (sext x)
10598   // if x is small enough or if we know that x has more than 1 sign bit and the
10599   // sign_extend_inreg is extending from one of them.
10600   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
10601     SDValue N00 = N0.getOperand(0);
10602     unsigned N00Bits = N00.getScalarValueSizeInBits();
10603     if ((N00Bits <= EVTBits ||
10604          (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
10605         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10606       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
10607   }
10608 
10609   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
10610   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
10611        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
10612        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
10613       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
10614     if (!LegalOperations ||
10615         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
10616       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
10617                          N0.getOperand(0));
10618   }
10619 
10620   // fold (sext_in_reg (zext x)) -> (sext x)
10621   // iff we are extending the source sign bit.
10622   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
10623     SDValue N00 = N0.getOperand(0);
10624     if (N00.getScalarValueSizeInBits() == EVTBits &&
10625         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10626       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
10627   }
10628 
10629   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
10630   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
10631     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
10632 
10633   // fold operands of sext_in_reg based on knowledge that the top bits are not
10634   // demanded.
10635   if (SimplifyDemandedBits(SDValue(N, 0)))
10636     return SDValue(N, 0);
10637 
10638   // fold (sext_in_reg (load x)) -> (smaller sextload x)
10639   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
10640   if (SDValue NarrowLoad = ReduceLoadWidth(N))
10641     return NarrowLoad;
10642 
10643   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
10644   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
10645   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
10646   if (N0.getOpcode() == ISD::SRL) {
10647     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
10648       if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
10649         // We can turn this into an SRA iff the input to the SRL is already sign
10650         // extended enough.
10651         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
10652         if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
10653           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
10654                              N0.getOperand(1));
10655       }
10656   }
10657 
10658   // fold (sext_inreg (extload x)) -> (sextload x)
10659   // If sextload is not supported by target, we can only do the combine when
10660   // load has one use. Doing otherwise can block folding the extload with other
10661   // extends that the target does support.
10662   if (ISD::isEXTLoad(N0.getNode()) &&
10663       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10664       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10665       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
10666         N0.hasOneUse()) ||
10667        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10668     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10669     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10670                                      LN0->getChain(),
10671                                      LN0->getBasePtr(), EVT,
10672                                      LN0->getMemOperand());
10673     CombineTo(N, ExtLoad);
10674     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10675     AddToWorklist(ExtLoad.getNode());
10676     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10677   }
10678   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
10679   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
10680       N0.hasOneUse() &&
10681       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10682       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
10683        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10684     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10685     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10686                                      LN0->getChain(),
10687                                      LN0->getBasePtr(), EVT,
10688                                      LN0->getMemOperand());
10689     CombineTo(N, ExtLoad);
10690     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10691     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10692   }
10693 
10694   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
10695   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
10696     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
10697                                            N0.getOperand(1), false))
10698       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10699                          BSwap, N1);
10700   }
10701 
10702   return SDValue();
10703 }
10704 
10705 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
10706   SDValue N0 = N->getOperand(0);
10707   EVT VT = N->getValueType(0);
10708 
10709   if (N0.isUndef())
10710     return DAG.getUNDEF(VT);
10711 
10712   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10713     return Res;
10714 
10715   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10716     return SDValue(N, 0);
10717 
10718   return SDValue();
10719 }
10720 
10721 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10722   SDValue N0 = N->getOperand(0);
10723   EVT VT = N->getValueType(0);
10724 
10725   if (N0.isUndef())
10726     return DAG.getUNDEF(VT);
10727 
10728   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10729     return Res;
10730 
10731   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10732     return SDValue(N, 0);
10733 
10734   return SDValue();
10735 }
10736 
10737 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10738   SDValue N0 = N->getOperand(0);
10739   EVT VT = N->getValueType(0);
10740   EVT SrcVT = N0.getValueType();
10741   bool isLE = DAG.getDataLayout().isLittleEndian();
10742 
10743   // noop truncate
10744   if (SrcVT == VT)
10745     return N0;
10746 
10747   // fold (truncate (truncate x)) -> (truncate x)
10748   if (N0.getOpcode() == ISD::TRUNCATE)
10749     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10750 
10751   // fold (truncate c1) -> c1
10752   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
10753     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10754     if (C.getNode() != N)
10755       return C;
10756   }
10757 
10758   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10759   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10760       N0.getOpcode() == ISD::SIGN_EXTEND ||
10761       N0.getOpcode() == ISD::ANY_EXTEND) {
10762     // if the source is smaller than the dest, we still need an extend.
10763     if (N0.getOperand(0).getValueType().bitsLT(VT))
10764       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10765     // if the source is larger than the dest, than we just need the truncate.
10766     if (N0.getOperand(0).getValueType().bitsGT(VT))
10767       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10768     // if the source and dest are the same type, we can drop both the extend
10769     // and the truncate.
10770     return N0.getOperand(0);
10771   }
10772 
10773   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10774   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
10775     return SDValue();
10776 
10777   // Fold extract-and-trunc into a narrow extract. For example:
10778   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10779   //   i32 y = TRUNCATE(i64 x)
10780   //        -- becomes --
10781   //   v16i8 b = BITCAST (v2i64 val)
10782   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10783   //
10784   // Note: We only run this optimization after type legalization (which often
10785   // creates this pattern) and before operation legalization after which
10786   // we need to be more careful about the vector instructions that we generate.
10787   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10788       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
10789     EVT VecTy = N0.getOperand(0).getValueType();
10790     EVT ExTy = N0.getValueType();
10791     EVT TrTy = N->getValueType(0);
10792 
10793     unsigned NumElem = VecTy.getVectorNumElements();
10794     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10795 
10796     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10797     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
10798 
10799     SDValue EltNo = N0->getOperand(1);
10800     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10801       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10802       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
10803 
10804       SDLoc DL(N);
10805       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10806                          DAG.getBitcast(NVT, N0.getOperand(0)),
10807                          DAG.getVectorIdxConstant(Index, DL));
10808     }
10809   }
10810 
10811   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10812   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
10813     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
10814         TLI.isTruncateFree(SrcVT, VT)) {
10815       SDLoc SL(N0);
10816       SDValue Cond = N0.getOperand(0);
10817       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10818       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10819       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10820     }
10821   }
10822 
10823   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10824   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10825       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
10826       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
10827     SDValue Amt = N0.getOperand(1);
10828     KnownBits Known = DAG.computeKnownBits(Amt);
10829     unsigned Size = VT.getScalarSizeInBits();
10830     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10831       SDLoc SL(N);
10832       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10833 
10834       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10835       if (AmtVT != Amt.getValueType()) {
10836         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10837         AddToWorklist(Amt.getNode());
10838       }
10839       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10840     }
10841   }
10842 
10843   // Attempt to pre-truncate BUILD_VECTOR sources.
10844   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
10845       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
10846     SDLoc DL(N);
10847     EVT SVT = VT.getScalarType();
10848     SmallVector<SDValue, 8> TruncOps;
10849     for (const SDValue &Op : N0->op_values()) {
10850       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10851       TruncOps.push_back(TruncOp);
10852     }
10853     return DAG.getBuildVector(VT, DL, TruncOps);
10854   }
10855 
10856   // Fold a series of buildvector, bitcast, and truncate if possible.
10857   // For example fold
10858   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10859   //   (2xi32 (buildvector x, y)).
10860   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
10861       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
10862       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
10863       N0.getOperand(0).hasOneUse()) {
10864     SDValue BuildVect = N0.getOperand(0);
10865     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10866     EVT TruncVecEltTy = VT.getVectorElementType();
10867 
10868     // Check that the element types match.
10869     if (BuildVectEltTy == TruncVecEltTy) {
10870       // Now we only need to compute the offset of the truncated elements.
10871       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
10872       unsigned TruncVecNumElts = VT.getVectorNumElements();
10873       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10874 
10875       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
10876              "Invalid number of elements");
10877 
10878       SmallVector<SDValue, 8> Opnds;
10879       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
10880         Opnds.push_back(BuildVect.getOperand(i));
10881 
10882       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10883     }
10884   }
10885 
10886   // See if we can simplify the input to this truncate through knowledge that
10887   // only the low bits are being used.
10888   // For example "trunc (or (shl x, 8), y)" // -> trunc y
10889   // Currently we only perform this optimization on scalars because vectors
10890   // may have different active low bits.
10891   if (!VT.isVector()) {
10892     APInt Mask =
10893         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
10894     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
10895       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
10896   }
10897 
10898   // fold (truncate (load x)) -> (smaller load x)
10899   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10900   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
10901     if (SDValue Reduced = ReduceLoadWidth(N))
10902       return Reduced;
10903 
10904     // Handle the case where the load remains an extending load even
10905     // after truncation.
10906     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
10907       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10908       if (LN0->isSimple() &&
10909           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
10910         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
10911                                          VT, LN0->getChain(), LN0->getBasePtr(),
10912                                          LN0->getMemoryVT(),
10913                                          LN0->getMemOperand());
10914         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
10915         return NewLoad;
10916       }
10917     }
10918   }
10919 
10920   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10921   // where ... are all 'undef'.
10922   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
10923     SmallVector<EVT, 8> VTs;
10924     SDValue V;
10925     unsigned Idx = 0;
10926     unsigned NumDefs = 0;
10927 
10928     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10929       SDValue X = N0.getOperand(i);
10930       if (!X.isUndef()) {
10931         V = X;
10932         Idx = i;
10933         NumDefs++;
10934       }
10935       // Stop if more than one members are non-undef.
10936       if (NumDefs > 1)
10937         break;
10938       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
10939                                      VT.getVectorElementType(),
10940                                      X.getValueType().getVectorNumElements()));
10941     }
10942 
10943     if (NumDefs == 0)
10944       return DAG.getUNDEF(VT);
10945 
10946     if (NumDefs == 1) {
10947       assert(V.getNode() && "The single defined operand is empty!");
10948       SmallVector<SDValue, 8> Opnds;
10949       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10950         if (i != Idx) {
10951           Opnds.push_back(DAG.getUNDEF(VTs[i]));
10952           continue;
10953         }
10954         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10955         AddToWorklist(NV.getNode());
10956         Opnds.push_back(NV);
10957       }
10958       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10959     }
10960   }
10961 
10962   // Fold truncate of a bitcast of a vector to an extract of the low vector
10963   // element.
10964   //
10965   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10966   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10967     SDValue VecSrc = N0.getOperand(0);
10968     EVT VecSrcVT = VecSrc.getValueType();
10969     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
10970         (!LegalOperations ||
10971          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
10972       SDLoc SL(N);
10973 
10974       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
10975       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
10976                          DAG.getVectorIdxConstant(Idx, SL));
10977     }
10978   }
10979 
10980   // Simplify the operands using demanded-bits information.
10981   if (!VT.isVector() &&
10982       SimplifyDemandedBits(SDValue(N, 0)))
10983     return SDValue(N, 0);
10984 
10985   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10986   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10987   // When the adde's carry is not used.
10988   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10989       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10990       // We only do for addcarry before legalize operation
10991       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
10992        TLI.isOperationLegal(N0.getOpcode(), VT))) {
10993     SDLoc SL(N);
10994     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10995     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10996     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10997     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10998   }
10999 
11000   // fold (truncate (extract_subvector(ext x))) ->
11001   //      (extract_subvector x)
11002   // TODO: This can be generalized to cover cases where the truncate and extract
11003   // do not fully cancel each other out.
11004   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
11005     SDValue N00 = N0.getOperand(0);
11006     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
11007         N00.getOpcode() == ISD::ZERO_EXTEND ||
11008         N00.getOpcode() == ISD::ANY_EXTEND) {
11009       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
11010           VT.getVectorElementType())
11011         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
11012                            N00.getOperand(0), N0.getOperand(1));
11013     }
11014   }
11015 
11016   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11017     return NewVSel;
11018 
11019   // Narrow a suitable binary operation with a non-opaque constant operand by
11020   // moving it ahead of the truncate. This is limited to pre-legalization
11021   // because targets may prefer a wider type during later combines and invert
11022   // this transform.
11023   switch (N0.getOpcode()) {
11024   case ISD::ADD:
11025   case ISD::SUB:
11026   case ISD::MUL:
11027   case ISD::AND:
11028   case ISD::OR:
11029   case ISD::XOR:
11030     if (!LegalOperations && N0.hasOneUse() &&
11031         (isConstantOrConstantVector(N0.getOperand(0), true) ||
11032          isConstantOrConstantVector(N0.getOperand(1), true))) {
11033       // TODO: We already restricted this to pre-legalization, but for vectors
11034       // we are extra cautious to not create an unsupported operation.
11035       // Target-specific changes are likely needed to avoid regressions here.
11036       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
11037         SDLoc DL(N);
11038         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
11039         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
11040         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
11041       }
11042     }
11043   }
11044 
11045   return SDValue();
11046 }
11047 
11048 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
11049   SDValue Elt = N->getOperand(i);
11050   if (Elt.getOpcode() != ISD::MERGE_VALUES)
11051     return Elt.getNode();
11052   return Elt.getOperand(Elt.getResNo()).getNode();
11053 }
11054 
11055 /// build_pair (load, load) -> load
11056 /// if load locations are consecutive.
11057 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
11058   assert(N->getOpcode() == ISD::BUILD_PAIR);
11059 
11060   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
11061   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
11062 
11063   // A BUILD_PAIR is always having the least significant part in elt 0 and the
11064   // most significant part in elt 1. So when combining into one large load, we
11065   // need to consider the endianness.
11066   if (DAG.getDataLayout().isBigEndian())
11067     std::swap(LD1, LD2);
11068 
11069   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
11070       LD1->getAddressSpace() != LD2->getAddressSpace())
11071     return SDValue();
11072   EVT LD1VT = LD1->getValueType(0);
11073   unsigned LD1Bytes = LD1VT.getStoreSize();
11074   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
11075       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
11076     unsigned Align = LD1->getAlignment();
11077     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
11078         VT.getTypeForEVT(*DAG.getContext()));
11079 
11080     if (NewAlign <= Align &&
11081         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
11082       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
11083                          LD1->getPointerInfo(), Align);
11084   }
11085 
11086   return SDValue();
11087 }
11088 
11089 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
11090   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
11091   // and Lo parts; on big-endian machines it doesn't.
11092   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
11093 }
11094 
11095 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
11096                                     const TargetLowering &TLI) {
11097   // If this is not a bitcast to an FP type or if the target doesn't have
11098   // IEEE754-compliant FP logic, we're done.
11099   EVT VT = N->getValueType(0);
11100   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
11101     return SDValue();
11102 
11103   // TODO: Handle cases where the integer constant is a different scalar
11104   // bitwidth to the FP.
11105   SDValue N0 = N->getOperand(0);
11106   EVT SourceVT = N0.getValueType();
11107   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
11108     return SDValue();
11109 
11110   unsigned FPOpcode;
11111   APInt SignMask;
11112   switch (N0.getOpcode()) {
11113   case ISD::AND:
11114     FPOpcode = ISD::FABS;
11115     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
11116     break;
11117   case ISD::XOR:
11118     FPOpcode = ISD::FNEG;
11119     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11120     break;
11121   case ISD::OR:
11122     FPOpcode = ISD::FABS;
11123     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11124     break;
11125   default:
11126     return SDValue();
11127   }
11128 
11129   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
11130   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
11131   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
11132   //   fneg (fabs X)
11133   SDValue LogicOp0 = N0.getOperand(0);
11134   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
11135   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
11136       LogicOp0.getOpcode() == ISD::BITCAST &&
11137       LogicOp0.getOperand(0).getValueType() == VT) {
11138     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
11139     NumFPLogicOpsConv++;
11140     if (N0.getOpcode() == ISD::OR)
11141       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
11142     return FPOp;
11143   }
11144 
11145   return SDValue();
11146 }
11147 
11148 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11149   SDValue N0 = N->getOperand(0);
11150   EVT VT = N->getValueType(0);
11151 
11152   if (N0.isUndef())
11153     return DAG.getUNDEF(VT);
11154 
11155   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11156   // Only do this before legalize types, unless both types are integer and the
11157   // scalar type is legal. Only do this before legalize ops, since the target
11158   // maybe depending on the bitcast.
11159   // First check to see if this is all constant.
11160   // TODO: Support FP bitcasts after legalize types.
11161   if (VT.isVector() &&
11162       (!LegalTypes ||
11163        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
11164         TLI.isTypeLegal(VT.getVectorElementType()))) &&
11165       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
11166       cast<BuildVectorSDNode>(N0)->isConstant())
11167     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11168                                              VT.getVectorElementType());
11169 
11170   // If the input is a constant, let getNode fold it.
11171   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
11172     // If we can't allow illegal operations, we need to check that this is just
11173     // a fp -> int or int -> conversion and that the resulting operation will
11174     // be legal.
11175     if (!LegalOperations ||
11176         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
11177          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
11178         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
11179          TLI.isOperationLegal(ISD::Constant, VT))) {
11180       SDValue C = DAG.getBitcast(VT, N0);
11181       if (C.getNode() != N)
11182         return C;
11183     }
11184   }
11185 
11186   // (conv (conv x, t1), t2) -> (conv x, t2)
11187   if (N0.getOpcode() == ISD::BITCAST)
11188     return DAG.getBitcast(VT, N0.getOperand(0));
11189 
11190   // fold (conv (load x)) -> (load (conv*)x)
11191   // If the resultant load doesn't need a higher alignment than the original!
11192   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11193       // Do not remove the cast if the types differ in endian layout.
11194       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
11195           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11196       // If the load is volatile, we only want to change the load type if the
11197       // resulting load is legal. Otherwise we might increase the number of
11198       // memory accesses. We don't care if the original type was legal or not
11199       // as we assume software couldn't rely on the number of accesses of an
11200       // illegal type.
11201       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
11202        TLI.isOperationLegal(ISD::LOAD, VT))) {
11203     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11204 
11205     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11206                                     *LN0->getMemOperand())) {
11207       SDValue Load =
11208           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11209                       LN0->getPointerInfo(), LN0->getAlignment(),
11210                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11211       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11212       return Load;
11213     }
11214   }
11215 
11216   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11217     return V;
11218 
11219   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11220   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11221   //
11222   // For ppc_fp128:
11223   // fold (bitcast (fneg x)) ->
11224   //     flipbit = signbit
11225   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11226   //
11227   // fold (bitcast (fabs x)) ->
11228   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
11229   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11230   // This often reduces constant pool loads.
11231   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11232        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11233       N0.getNode()->hasOneUse() && VT.isInteger() &&
11234       !VT.isVector() && !N0.getValueType().isVector()) {
11235     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11236     AddToWorklist(NewConv.getNode());
11237 
11238     SDLoc DL(N);
11239     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11240       assert(VT.getSizeInBits() == 128);
11241       SDValue SignBit = DAG.getConstant(
11242           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11243       SDValue FlipBit;
11244       if (N0.getOpcode() == ISD::FNEG) {
11245         FlipBit = SignBit;
11246         AddToWorklist(FlipBit.getNode());
11247       } else {
11248         assert(N0.getOpcode() == ISD::FABS);
11249         SDValue Hi =
11250             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11251                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11252                                               SDLoc(NewConv)));
11253         AddToWorklist(Hi.getNode());
11254         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11255         AddToWorklist(FlipBit.getNode());
11256       }
11257       SDValue FlipBits =
11258           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11259       AddToWorklist(FlipBits.getNode());
11260       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11261     }
11262     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11263     if (N0.getOpcode() == ISD::FNEG)
11264       return DAG.getNode(ISD::XOR, DL, VT,
11265                          NewConv, DAG.getConstant(SignBit, DL, VT));
11266     assert(N0.getOpcode() == ISD::FABS);
11267     return DAG.getNode(ISD::AND, DL, VT,
11268                        NewConv, DAG.getConstant(~SignBit, DL, VT));
11269   }
11270 
11271   // fold (bitconvert (fcopysign cst, x)) ->
11272   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
11273   // Note that we don't handle (copysign x, cst) because this can always be
11274   // folded to an fneg or fabs.
11275   //
11276   // For ppc_fp128:
11277   // fold (bitcast (fcopysign cst, x)) ->
11278   //     flipbit = (and (extract_element
11279   //                     (xor (bitcast cst), (bitcast x)), 0),
11280   //                    signbit)
11281   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
11282   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11283       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11284       VT.isInteger() && !VT.isVector()) {
11285     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11286     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11287     if (isTypeLegal(IntXVT)) {
11288       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11289       AddToWorklist(X.getNode());
11290 
11291       // If X has a different width than the result/lhs, sext it or truncate it.
11292       unsigned VTWidth = VT.getSizeInBits();
11293       if (OrigXWidth < VTWidth) {
11294         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11295         AddToWorklist(X.getNode());
11296       } else if (OrigXWidth > VTWidth) {
11297         // To get the sign bit in the right place, we have to shift it right
11298         // before truncating.
11299         SDLoc DL(X);
11300         X = DAG.getNode(ISD::SRL, DL,
11301                         X.getValueType(), X,
11302                         DAG.getConstant(OrigXWidth-VTWidth, DL,
11303                                         X.getValueType()));
11304         AddToWorklist(X.getNode());
11305         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11306         AddToWorklist(X.getNode());
11307       }
11308 
11309       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11310         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11311         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11312         AddToWorklist(Cst.getNode());
11313         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11314         AddToWorklist(X.getNode());
11315         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11316         AddToWorklist(XorResult.getNode());
11317         SDValue XorResult64 = DAG.getNode(
11318             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11319             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11320                                   SDLoc(XorResult)));
11321         AddToWorklist(XorResult64.getNode());
11322         SDValue FlipBit =
11323             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11324                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11325         AddToWorklist(FlipBit.getNode());
11326         SDValue FlipBits =
11327             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11328         AddToWorklist(FlipBits.getNode());
11329         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11330       }
11331       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11332       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11333                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
11334       AddToWorklist(X.getNode());
11335 
11336       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11337       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11338                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11339       AddToWorklist(Cst.getNode());
11340 
11341       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11342     }
11343   }
11344 
11345   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11346   if (N0.getOpcode() == ISD::BUILD_PAIR)
11347     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11348       return CombineLD;
11349 
11350   // Remove double bitcasts from shuffles - this is often a legacy of
11351   // XformToShuffleWithZero being used to combine bitmaskings (of
11352   // float vectors bitcast to integer vectors) into shuffles.
11353   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11354   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11355       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11356       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11357       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11358     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11359 
11360     // If operands are a bitcast, peek through if it casts the original VT.
11361     // If operands are a constant, just bitcast back to original VT.
11362     auto PeekThroughBitcast = [&](SDValue Op) {
11363       if (Op.getOpcode() == ISD::BITCAST &&
11364           Op.getOperand(0).getValueType() == VT)
11365         return SDValue(Op.getOperand(0));
11366       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11367           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
11368         return DAG.getBitcast(VT, Op);
11369       return SDValue();
11370     };
11371 
11372     // FIXME: If either input vector is bitcast, try to convert the shuffle to
11373     // the result type of this bitcast. This would eliminate at least one
11374     // bitcast. See the transform in InstCombine.
11375     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11376     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11377     if (!(SV0 && SV1))
11378       return SDValue();
11379 
11380     int MaskScale =
11381         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11382     SmallVector<int, 8> NewMask;
11383     for (int M : SVN->getMask())
11384       for (int i = 0; i != MaskScale; ++i)
11385         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11386 
11387     SDValue LegalShuffle =
11388         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
11389     if (LegalShuffle)
11390       return LegalShuffle;
11391   }
11392 
11393   return SDValue();
11394 }
11395 
11396 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11397   EVT VT = N->getValueType(0);
11398   return CombineConsecutiveLoads(N, VT);
11399 }
11400 
11401 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11402 /// operands. DstEltVT indicates the destination element value type.
11403 SDValue DAGCombiner::
11404 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11405   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11406 
11407   // If this is already the right type, we're done.
11408   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11409 
11410   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11411   unsigned DstBitSize = DstEltVT.getSizeInBits();
11412 
11413   // If this is a conversion of N elements of one type to N elements of another
11414   // type, convert each element.  This handles FP<->INT cases.
11415   if (SrcBitSize == DstBitSize) {
11416     SmallVector<SDValue, 8> Ops;
11417     for (SDValue Op : BV->op_values()) {
11418       // If the vector element type is not legal, the BUILD_VECTOR operands
11419       // are promoted and implicitly truncated.  Make that explicit here.
11420       if (Op.getValueType() != SrcEltVT)
11421         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11422       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11423       AddToWorklist(Ops.back().getNode());
11424     }
11425     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11426                               BV->getValueType(0).getVectorNumElements());
11427     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11428   }
11429 
11430   // Otherwise, we're growing or shrinking the elements.  To avoid having to
11431   // handle annoying details of growing/shrinking FP values, we convert them to
11432   // int first.
11433   if (SrcEltVT.isFloatingPoint()) {
11434     // Convert the input float vector to a int vector where the elements are the
11435     // same sizes.
11436     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11437     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11438     SrcEltVT = IntVT;
11439   }
11440 
11441   // Now we know the input is an integer vector.  If the output is a FP type,
11442   // convert to integer first, then to FP of the right size.
11443   if (DstEltVT.isFloatingPoint()) {
11444     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11445     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11446 
11447     // Next, convert to FP elements of the same size.
11448     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11449   }
11450 
11451   SDLoc DL(BV);
11452 
11453   // Okay, we know the src/dst types are both integers of differing types.
11454   // Handling growing first.
11455   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11456   if (SrcBitSize < DstBitSize) {
11457     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11458 
11459     SmallVector<SDValue, 8> Ops;
11460     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11461          i += NumInputsPerOutput) {
11462       bool isLE = DAG.getDataLayout().isLittleEndian();
11463       APInt NewBits = APInt(DstBitSize, 0);
11464       bool EltIsUndef = true;
11465       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11466         // Shift the previously computed bits over.
11467         NewBits <<= SrcBitSize;
11468         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11469         if (Op.isUndef()) continue;
11470         EltIsUndef = false;
11471 
11472         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11473                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
11474       }
11475 
11476       if (EltIsUndef)
11477         Ops.push_back(DAG.getUNDEF(DstEltVT));
11478       else
11479         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11480     }
11481 
11482     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11483     return DAG.getBuildVector(VT, DL, Ops);
11484   }
11485 
11486   // Finally, this must be the case where we are shrinking elements: each input
11487   // turns into multiple outputs.
11488   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11489   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11490                             NumOutputsPerInput*BV->getNumOperands());
11491   SmallVector<SDValue, 8> Ops;
11492 
11493   for (const SDValue &Op : BV->op_values()) {
11494     if (Op.isUndef()) {
11495       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11496       continue;
11497     }
11498 
11499     APInt OpVal = cast<ConstantSDNode>(Op)->
11500                   getAPIntValue().zextOrTrunc(SrcBitSize);
11501 
11502     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11503       APInt ThisVal = OpVal.trunc(DstBitSize);
11504       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11505       OpVal.lshrInPlace(DstBitSize);
11506     }
11507 
11508     // For big endian targets, swap the order of the pieces of each element.
11509     if (DAG.getDataLayout().isBigEndian())
11510       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11511   }
11512 
11513   return DAG.getBuildVector(VT, DL, Ops);
11514 }
11515 
11516 static bool isContractable(SDNode *N) {
11517   SDNodeFlags F = N->getFlags();
11518   return F.hasAllowContract() || F.hasAllowReassociation();
11519 }
11520 
11521 /// Try to perform FMA combining on a given FADD node.
11522 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11523   SDValue N0 = N->getOperand(0);
11524   SDValue N1 = N->getOperand(1);
11525   EVT VT = N->getValueType(0);
11526   SDLoc SL(N);
11527 
11528   const TargetOptions &Options = DAG.getTarget().Options;
11529 
11530   // Floating-point multiply-add with intermediate rounding.
11531   bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
11532 
11533   // Floating-point multiply-add without intermediate rounding.
11534   bool HasFMA =
11535       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
11536       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11537 
11538   // No valid opcode, do not combine.
11539   if (!HasFMAD && !HasFMA)
11540     return SDValue();
11541 
11542   SDNodeFlags Flags = N->getFlags();
11543   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11544   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11545                               CanFuse || HasFMAD);
11546   // If the addition is not contractable, do not combine.
11547   if (!AllowFusionGlobally && !isContractable(N))
11548     return SDValue();
11549 
11550   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11551   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11552     return SDValue();
11553 
11554   // Always prefer FMAD to FMA for precision.
11555   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11556   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11557 
11558   // Is the node an FMUL and contractable either due to global flags or
11559   // SDNodeFlags.
11560   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11561     if (N.getOpcode() != ISD::FMUL)
11562       return false;
11563     return AllowFusionGlobally || isContractable(N.getNode());
11564   };
11565   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
11566   // prefer to fold the multiply with fewer uses.
11567   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
11568     if (N0.getNode()->use_size() > N1.getNode()->use_size())
11569       std::swap(N0, N1);
11570   }
11571 
11572   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
11573   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11574     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11575                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
11576   }
11577 
11578   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
11579   // Note: Commutes FADD operands.
11580   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11581     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11582                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
11583   }
11584 
11585   // Look through FP_EXTEND nodes to do more combining.
11586 
11587   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
11588   if (N0.getOpcode() == ISD::FP_EXTEND) {
11589     SDValue N00 = N0.getOperand(0);
11590     if (isContractableFMUL(N00) &&
11591         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11592                             N00.getValueType())) {
11593       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11594                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11595                                      N00.getOperand(0)),
11596                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11597                                      N00.getOperand(1)), N1, Flags);
11598     }
11599   }
11600 
11601   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
11602   // Note: Commutes FADD operands.
11603   if (N1.getOpcode() == ISD::FP_EXTEND) {
11604     SDValue N10 = N1.getOperand(0);
11605     if (isContractableFMUL(N10) &&
11606         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11607                             N10.getValueType())) {
11608       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11609                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11610                                      N10.getOperand(0)),
11611                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11612                                      N10.getOperand(1)), N0, Flags);
11613     }
11614   }
11615 
11616   // More folding opportunities when target permits.
11617   if (Aggressive) {
11618     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
11619     if (CanFuse &&
11620         N0.getOpcode() == PreferredFusedOpcode &&
11621         N0.getOperand(2).getOpcode() == ISD::FMUL &&
11622         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
11623       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11624                          N0.getOperand(0), N0.getOperand(1),
11625                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11626                                      N0.getOperand(2).getOperand(0),
11627                                      N0.getOperand(2).getOperand(1),
11628                                      N1, Flags), Flags);
11629     }
11630 
11631     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
11632     if (CanFuse &&
11633         N1->getOpcode() == PreferredFusedOpcode &&
11634         N1.getOperand(2).getOpcode() == ISD::FMUL &&
11635         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
11636       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11637                          N1.getOperand(0), N1.getOperand(1),
11638                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11639                                      N1.getOperand(2).getOperand(0),
11640                                      N1.getOperand(2).getOperand(1),
11641                                      N0, Flags), Flags);
11642     }
11643 
11644 
11645     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
11646     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
11647     auto FoldFAddFMAFPExtFMul = [&] (
11648       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11649       SDNodeFlags Flags) {
11650       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
11651                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11652                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11653                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11654                                      Z, Flags), Flags);
11655     };
11656     if (N0.getOpcode() == PreferredFusedOpcode) {
11657       SDValue N02 = N0.getOperand(2);
11658       if (N02.getOpcode() == ISD::FP_EXTEND) {
11659         SDValue N020 = N02.getOperand(0);
11660         if (isContractableFMUL(N020) &&
11661             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11662                                 N020.getValueType())) {
11663           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
11664                                       N020.getOperand(0), N020.getOperand(1),
11665                                       N1, Flags);
11666         }
11667       }
11668     }
11669 
11670     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
11671     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
11672     // FIXME: This turns two single-precision and one double-precision
11673     // operation into two double-precision operations, which might not be
11674     // interesting for all targets, especially GPUs.
11675     auto FoldFAddFPExtFMAFMul = [&] (
11676       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11677       SDNodeFlags Flags) {
11678       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11679                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
11680                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
11681                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11682                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11683                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11684                                      Z, Flags), Flags);
11685     };
11686     if (N0.getOpcode() == ISD::FP_EXTEND) {
11687       SDValue N00 = N0.getOperand(0);
11688       if (N00.getOpcode() == PreferredFusedOpcode) {
11689         SDValue N002 = N00.getOperand(2);
11690         if (isContractableFMUL(N002) &&
11691             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11692                                 N00.getValueType())) {
11693           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
11694                                       N002.getOperand(0), N002.getOperand(1),
11695                                       N1, Flags);
11696         }
11697       }
11698     }
11699 
11700     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
11701     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
11702     if (N1.getOpcode() == PreferredFusedOpcode) {
11703       SDValue N12 = N1.getOperand(2);
11704       if (N12.getOpcode() == ISD::FP_EXTEND) {
11705         SDValue N120 = N12.getOperand(0);
11706         if (isContractableFMUL(N120) &&
11707             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11708                                 N120.getValueType())) {
11709           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11710                                       N120.getOperand(0), N120.getOperand(1),
11711                                       N0, Flags);
11712         }
11713       }
11714     }
11715 
11716     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11717     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11718     // FIXME: This turns two single-precision and one double-precision
11719     // operation into two double-precision operations, which might not be
11720     // interesting for all targets, especially GPUs.
11721     if (N1.getOpcode() == ISD::FP_EXTEND) {
11722       SDValue N10 = N1.getOperand(0);
11723       if (N10.getOpcode() == PreferredFusedOpcode) {
11724         SDValue N102 = N10.getOperand(2);
11725         if (isContractableFMUL(N102) &&
11726             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11727                                 N10.getValueType())) {
11728           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11729                                       N102.getOperand(0), N102.getOperand(1),
11730                                       N0, Flags);
11731         }
11732       }
11733     }
11734   }
11735 
11736   return SDValue();
11737 }
11738 
11739 /// Try to perform FMA combining on a given FSUB node.
11740 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11741   SDValue N0 = N->getOperand(0);
11742   SDValue N1 = N->getOperand(1);
11743   EVT VT = N->getValueType(0);
11744   SDLoc SL(N);
11745 
11746   const TargetOptions &Options = DAG.getTarget().Options;
11747   // Floating-point multiply-add with intermediate rounding.
11748   bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
11749 
11750   // Floating-point multiply-add without intermediate rounding.
11751   bool HasFMA =
11752       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
11753       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11754 
11755   // No valid opcode, do not combine.
11756   if (!HasFMAD && !HasFMA)
11757     return SDValue();
11758 
11759   const SDNodeFlags Flags = N->getFlags();
11760   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11761   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11762                               CanFuse || HasFMAD);
11763 
11764   // If the subtraction is not contractable, do not combine.
11765   if (!AllowFusionGlobally && !isContractable(N))
11766     return SDValue();
11767 
11768   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11769   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11770     return SDValue();
11771 
11772   // Always prefer FMAD to FMA for precision.
11773   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11774   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11775 
11776   // Is the node an FMUL and contractable either due to global flags or
11777   // SDNodeFlags.
11778   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11779     if (N.getOpcode() != ISD::FMUL)
11780       return false;
11781     return AllowFusionGlobally || isContractable(N.getNode());
11782   };
11783 
11784   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11785   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11786     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11787                        N0.getOperand(0), N0.getOperand(1),
11788                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11789   }
11790 
11791   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11792   // Note: Commutes FSUB operands.
11793   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11794     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11795                        DAG.getNode(ISD::FNEG, SL, VT,
11796                                    N1.getOperand(0)),
11797                        N1.getOperand(1), N0, Flags);
11798   }
11799 
11800   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11801   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
11802       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
11803     SDValue N00 = N0.getOperand(0).getOperand(0);
11804     SDValue N01 = N0.getOperand(0).getOperand(1);
11805     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11806                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11807                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11808   }
11809 
11810   // Look through FP_EXTEND nodes to do more combining.
11811 
11812   // fold (fsub (fpext (fmul x, y)), z)
11813   //   -> (fma (fpext x), (fpext y), (fneg z))
11814   if (N0.getOpcode() == ISD::FP_EXTEND) {
11815     SDValue N00 = N0.getOperand(0);
11816     if (isContractableFMUL(N00) &&
11817         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11818                             N00.getValueType())) {
11819       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11820                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11821                                      N00.getOperand(0)),
11822                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11823                                      N00.getOperand(1)),
11824                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11825     }
11826   }
11827 
11828   // fold (fsub x, (fpext (fmul y, z)))
11829   //   -> (fma (fneg (fpext y)), (fpext z), x)
11830   // Note: Commutes FSUB operands.
11831   if (N1.getOpcode() == ISD::FP_EXTEND) {
11832     SDValue N10 = N1.getOperand(0);
11833     if (isContractableFMUL(N10) &&
11834         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11835                             N10.getValueType())) {
11836       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11837                          DAG.getNode(ISD::FNEG, SL, VT,
11838                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
11839                                                  N10.getOperand(0))),
11840                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11841                                      N10.getOperand(1)),
11842                          N0, Flags);
11843     }
11844   }
11845 
11846   // fold (fsub (fpext (fneg (fmul, x, y))), z)
11847   //   -> (fneg (fma (fpext x), (fpext y), z))
11848   // Note: This could be removed with appropriate canonicalization of the
11849   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11850   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11851   // from implementing the canonicalization in visitFSUB.
11852   if (N0.getOpcode() == ISD::FP_EXTEND) {
11853     SDValue N00 = N0.getOperand(0);
11854     if (N00.getOpcode() == ISD::FNEG) {
11855       SDValue N000 = N00.getOperand(0);
11856       if (isContractableFMUL(N000) &&
11857           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11858                               N00.getValueType())) {
11859         return DAG.getNode(ISD::FNEG, SL, VT,
11860                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11861                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11862                                                    N000.getOperand(0)),
11863                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11864                                                    N000.getOperand(1)),
11865                                        N1, Flags));
11866       }
11867     }
11868   }
11869 
11870   // fold (fsub (fneg (fpext (fmul, x, y))), z)
11871   //   -> (fneg (fma (fpext x)), (fpext y), z)
11872   // Note: This could be removed with appropriate canonicalization of the
11873   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11874   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11875   // from implementing the canonicalization in visitFSUB.
11876   if (N0.getOpcode() == ISD::FNEG) {
11877     SDValue N00 = N0.getOperand(0);
11878     if (N00.getOpcode() == ISD::FP_EXTEND) {
11879       SDValue N000 = N00.getOperand(0);
11880       if (isContractableFMUL(N000) &&
11881           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11882                               N000.getValueType())) {
11883         return DAG.getNode(ISD::FNEG, SL, VT,
11884                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11885                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11886                                                    N000.getOperand(0)),
11887                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11888                                                    N000.getOperand(1)),
11889                                        N1, Flags));
11890       }
11891     }
11892   }
11893 
11894   // More folding opportunities when target permits.
11895   if (Aggressive) {
11896     // fold (fsub (fma x, y, (fmul u, v)), z)
11897     //   -> (fma x, y (fma u, v, (fneg z)))
11898     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
11899         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
11900         N0.getOperand(2)->hasOneUse()) {
11901       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11902                          N0.getOperand(0), N0.getOperand(1),
11903                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11904                                      N0.getOperand(2).getOperand(0),
11905                                      N0.getOperand(2).getOperand(1),
11906                                      DAG.getNode(ISD::FNEG, SL, VT,
11907                                                  N1), Flags), Flags);
11908     }
11909 
11910     // fold (fsub x, (fma y, z, (fmul u, v)))
11911     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
11912     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
11913         isContractableFMUL(N1.getOperand(2)) &&
11914         N1->hasOneUse()) {
11915       SDValue N20 = N1.getOperand(2).getOperand(0);
11916       SDValue N21 = N1.getOperand(2).getOperand(1);
11917       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11918                          DAG.getNode(ISD::FNEG, SL, VT,
11919                                      N1.getOperand(0)),
11920                          N1.getOperand(1),
11921                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11922                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
11923                                      N21, N0, Flags), Flags);
11924     }
11925 
11926 
11927     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11928     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11929     if (N0.getOpcode() == PreferredFusedOpcode &&
11930         N0->hasOneUse()) {
11931       SDValue N02 = N0.getOperand(2);
11932       if (N02.getOpcode() == ISD::FP_EXTEND) {
11933         SDValue N020 = N02.getOperand(0);
11934         if (isContractableFMUL(N020) &&
11935             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11936                                 N020.getValueType())) {
11937           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11938                              N0.getOperand(0), N0.getOperand(1),
11939                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11940                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11941                                                      N020.getOperand(0)),
11942                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11943                                                      N020.getOperand(1)),
11944                                          DAG.getNode(ISD::FNEG, SL, VT,
11945                                                      N1), Flags), Flags);
11946         }
11947       }
11948     }
11949 
11950     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11951     //   -> (fma (fpext x), (fpext y),
11952     //           (fma (fpext u), (fpext v), (fneg z)))
11953     // FIXME: This turns two single-precision and one double-precision
11954     // operation into two double-precision operations, which might not be
11955     // interesting for all targets, especially GPUs.
11956     if (N0.getOpcode() == ISD::FP_EXTEND) {
11957       SDValue N00 = N0.getOperand(0);
11958       if (N00.getOpcode() == PreferredFusedOpcode) {
11959         SDValue N002 = N00.getOperand(2);
11960         if (isContractableFMUL(N002) &&
11961             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11962                                 N00.getValueType())) {
11963           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11964                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11965                                          N00.getOperand(0)),
11966                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11967                                          N00.getOperand(1)),
11968                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11969                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11970                                                      N002.getOperand(0)),
11971                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11972                                                      N002.getOperand(1)),
11973                                          DAG.getNode(ISD::FNEG, SL, VT,
11974                                                      N1), Flags), Flags);
11975         }
11976       }
11977     }
11978 
11979     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11980     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11981     if (N1.getOpcode() == PreferredFusedOpcode &&
11982         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
11983         N1->hasOneUse()) {
11984       SDValue N120 = N1.getOperand(2).getOperand(0);
11985       if (isContractableFMUL(N120) &&
11986           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11987                               N120.getValueType())) {
11988         SDValue N1200 = N120.getOperand(0);
11989         SDValue N1201 = N120.getOperand(1);
11990         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11991                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11992                            N1.getOperand(1),
11993                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11994                                        DAG.getNode(ISD::FNEG, SL, VT,
11995                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11996                                                                VT, N1200)),
11997                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11998                                                    N1201),
11999                                        N0, Flags), Flags);
12000       }
12001     }
12002 
12003     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
12004     //   -> (fma (fneg (fpext y)), (fpext z),
12005     //           (fma (fneg (fpext u)), (fpext v), x))
12006     // FIXME: This turns two single-precision and one double-precision
12007     // operation into two double-precision operations, which might not be
12008     // interesting for all targets, especially GPUs.
12009     if (N1.getOpcode() == ISD::FP_EXTEND &&
12010         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
12011       SDValue CvtSrc = N1.getOperand(0);
12012       SDValue N100 = CvtSrc.getOperand(0);
12013       SDValue N101 = CvtSrc.getOperand(1);
12014       SDValue N102 = CvtSrc.getOperand(2);
12015       if (isContractableFMUL(N102) &&
12016           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12017                               CvtSrc.getValueType())) {
12018         SDValue N1020 = N102.getOperand(0);
12019         SDValue N1021 = N102.getOperand(1);
12020         return DAG.getNode(PreferredFusedOpcode, SL, VT,
12021                            DAG.getNode(ISD::FNEG, SL, VT,
12022                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12023                                                    N100)),
12024                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
12025                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12026                                        DAG.getNode(ISD::FNEG, SL, VT,
12027                                                    DAG.getNode(ISD::FP_EXTEND, SL,
12028                                                                VT, N1020)),
12029                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12030                                                    N1021),
12031                                        N0, Flags), Flags);
12032       }
12033     }
12034   }
12035 
12036   return SDValue();
12037 }
12038 
12039 /// Try to perform FMA combining on a given FMUL node based on the distributive
12040 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
12041 /// subtraction instead of addition).
12042 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
12043   SDValue N0 = N->getOperand(0);
12044   SDValue N1 = N->getOperand(1);
12045   EVT VT = N->getValueType(0);
12046   SDLoc SL(N);
12047   const SDNodeFlags Flags = N->getFlags();
12048 
12049   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
12050 
12051   const TargetOptions &Options = DAG.getTarget().Options;
12052 
12053   // The transforms below are incorrect when x == 0 and y == inf, because the
12054   // intermediate multiplication produces a nan.
12055   if (!Options.NoInfsFPMath)
12056     return SDValue();
12057 
12058   // Floating-point multiply-add without intermediate rounding.
12059   bool HasFMA =
12060       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
12061       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12062       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12063 
12064   // Floating-point multiply-add with intermediate rounding. This can result
12065   // in a less precise result due to the changed rounding order.
12066   bool HasFMAD = Options.UnsafeFPMath &&
12067                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
12068 
12069   // No valid opcode, do not combine.
12070   if (!HasFMAD && !HasFMA)
12071     return SDValue();
12072 
12073   // Always prefer FMAD to FMA for precision.
12074   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12075   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12076 
12077   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
12078   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
12079   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12080     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
12081       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
12082         if (C->isExactlyValue(+1.0))
12083           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12084                              Y, Flags);
12085         if (C->isExactlyValue(-1.0))
12086           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12087                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12088       }
12089     }
12090     return SDValue();
12091   };
12092 
12093   if (SDValue FMA = FuseFADD(N0, N1, Flags))
12094     return FMA;
12095   if (SDValue FMA = FuseFADD(N1, N0, Flags))
12096     return FMA;
12097 
12098   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
12099   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
12100   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
12101   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
12102   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12103     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
12104       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
12105         if (C0->isExactlyValue(+1.0))
12106           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12107                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12108                              Y, Flags);
12109         if (C0->isExactlyValue(-1.0))
12110           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12111                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12112                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12113       }
12114       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
12115         if (C1->isExactlyValue(+1.0))
12116           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12117                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12118         if (C1->isExactlyValue(-1.0))
12119           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12120                              Y, Flags);
12121       }
12122     }
12123     return SDValue();
12124   };
12125 
12126   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
12127     return FMA;
12128   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
12129     return FMA;
12130 
12131   return SDValue();
12132 }
12133 
12134 SDValue DAGCombiner::visitFADD(SDNode *N) {
12135   SDValue N0 = N->getOperand(0);
12136   SDValue N1 = N->getOperand(1);
12137   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12138   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12139   EVT VT = N->getValueType(0);
12140   SDLoc DL(N);
12141   const TargetOptions &Options = DAG.getTarget().Options;
12142   const SDNodeFlags Flags = N->getFlags();
12143 
12144   // fold vector ops
12145   if (VT.isVector())
12146     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12147       return FoldedVOp;
12148 
12149   // fold (fadd c1, c2) -> c1 + c2
12150   if (N0CFP && N1CFP)
12151     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
12152 
12153   // canonicalize constant to RHS
12154   if (N0CFP && !N1CFP)
12155     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
12156 
12157   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
12158   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12159   if (N1C && N1C->isZero())
12160     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
12161       return N0;
12162 
12163   if (SDValue NewSel = foldBinOpIntoSelect(N))
12164     return NewSel;
12165 
12166   // fold (fadd A, (fneg B)) -> (fsub A, B)
12167   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
12168       TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) ==
12169           TargetLowering::NegatibleCost::Cheaper)
12170     return DAG.getNode(
12171         ISD::FSUB, DL, VT, N0,
12172         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
12173 
12174   // fold (fadd (fneg A), B) -> (fsub B, A)
12175   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
12176       TLI.getNegatibleCost(N0, DAG, LegalOperations, ForCodeSize) ==
12177           TargetLowering::NegatibleCost::Cheaper)
12178     return DAG.getNode(
12179         ISD::FSUB, DL, VT, N1,
12180         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
12181 
12182   auto isFMulNegTwo = [](SDValue FMul) {
12183     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
12184       return false;
12185     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12186     return C && C->isExactlyValue(-2.0);
12187   };
12188 
12189   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12190   if (isFMulNegTwo(N0)) {
12191     SDValue B = N0.getOperand(0);
12192     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12193     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
12194   }
12195   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12196   if (isFMulNegTwo(N1)) {
12197     SDValue B = N1.getOperand(0);
12198     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12199     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
12200   }
12201 
12202   // No FP constant should be created after legalization as Instruction
12203   // Selection pass has a hard time dealing with FP constants.
12204   bool AllowNewConst = (Level < AfterLegalizeDAG);
12205 
12206   // If nnan is enabled, fold lots of things.
12207   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
12208     // If allowed, fold (fadd (fneg x), x) -> 0.0
12209     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
12210       return DAG.getConstantFP(0.0, DL, VT);
12211 
12212     // If allowed, fold (fadd x, (fneg x)) -> 0.0
12213     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
12214       return DAG.getConstantFP(0.0, DL, VT);
12215   }
12216 
12217   // If 'unsafe math' or reassoc and nsz, fold lots of things.
12218   // TODO: break out portions of the transformations below for which Unsafe is
12219   //       considered and which do not require both nsz and reassoc
12220   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12221        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12222       AllowNewConst) {
12223     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12224     if (N1CFP && N0.getOpcode() == ISD::FADD &&
12225         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12226       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
12227       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
12228     }
12229 
12230     // We can fold chains of FADD's of the same value into multiplications.
12231     // This transform is not safe in general because we are reducing the number
12232     // of rounding steps.
12233     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
12234       if (N0.getOpcode() == ISD::FMUL) {
12235         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12236         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
12237 
12238         // (fadd (fmul x, c), x) -> (fmul x, c+1)
12239         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
12240           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12241                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12242           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
12243         }
12244 
12245         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12246         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
12247             N1.getOperand(0) == N1.getOperand(1) &&
12248             N0.getOperand(0) == N1.getOperand(0)) {
12249           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12250                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12251           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12252         }
12253       }
12254 
12255       if (N1.getOpcode() == ISD::FMUL) {
12256         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12257         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12258 
12259         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12260         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12261           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12262                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12263           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12264         }
12265 
12266         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12267         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12268             N0.getOperand(0) == N0.getOperand(1) &&
12269             N1.getOperand(0) == N0.getOperand(0)) {
12270           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12271                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12272           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12273         }
12274       }
12275 
12276       if (N0.getOpcode() == ISD::FADD) {
12277         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12278         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12279         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12280             (N0.getOperand(0) == N1)) {
12281           return DAG.getNode(ISD::FMUL, DL, VT,
12282                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12283         }
12284       }
12285 
12286       if (N1.getOpcode() == ISD::FADD) {
12287         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12288         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12289         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12290             N1.getOperand(0) == N0) {
12291           return DAG.getNode(ISD::FMUL, DL, VT,
12292                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12293         }
12294       }
12295 
12296       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12297       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12298           N0.getOperand(0) == N0.getOperand(1) &&
12299           N1.getOperand(0) == N1.getOperand(1) &&
12300           N0.getOperand(0) == N1.getOperand(0)) {
12301         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12302                            DAG.getConstantFP(4.0, DL, VT), Flags);
12303       }
12304     }
12305   } // enable-unsafe-fp-math
12306 
12307   // FADD -> FMA combines:
12308   if (SDValue Fused = visitFADDForFMACombine(N)) {
12309     AddToWorklist(Fused.getNode());
12310     return Fused;
12311   }
12312   return SDValue();
12313 }
12314 
12315 SDValue DAGCombiner::visitFSUB(SDNode *N) {
12316   SDValue N0 = N->getOperand(0);
12317   SDValue N1 = N->getOperand(1);
12318   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12319   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12320   EVT VT = N->getValueType(0);
12321   SDLoc DL(N);
12322   const TargetOptions &Options = DAG.getTarget().Options;
12323   const SDNodeFlags Flags = N->getFlags();
12324 
12325   // fold vector ops
12326   if (VT.isVector())
12327     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12328       return FoldedVOp;
12329 
12330   // fold (fsub c1, c2) -> c1-c2
12331   if (N0CFP && N1CFP)
12332     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12333 
12334   if (SDValue NewSel = foldBinOpIntoSelect(N))
12335     return NewSel;
12336 
12337   // (fsub A, 0) -> A
12338   if (N1CFP && N1CFP->isZero()) {
12339     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
12340         Flags.hasNoSignedZeros()) {
12341       return N0;
12342     }
12343   }
12344 
12345   if (N0 == N1) {
12346     // (fsub x, x) -> 0.0
12347     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
12348       return DAG.getConstantFP(0.0f, DL, VT);
12349   }
12350 
12351   // (fsub -0.0, N1) -> -N1
12352   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12353   //       FSUB does not specify the sign bit of a NaN. Also note that for
12354   //       the same reason, the inverse transform is not safe, unless fast math
12355   //       flags are in play.
12356   if (N0CFP && N0CFP->isZero()) {
12357     if (N0CFP->isNegative() ||
12358         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
12359       if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) !=
12360           TargetLowering::NegatibleCost::Expensive)
12361         return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12362       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12363         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12364     }
12365   }
12366 
12367   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12368        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12369       N1.getOpcode() == ISD::FADD) {
12370     // X - (X + Y) -> -Y
12371     if (N0 == N1->getOperand(0))
12372       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12373     // X - (Y + X) -> -Y
12374     if (N0 == N1->getOperand(1))
12375       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12376   }
12377 
12378   // fold (fsub A, (fneg B)) -> (fadd A, B)
12379   if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) !=
12380       TargetLowering::NegatibleCost::Expensive)
12381     return DAG.getNode(
12382         ISD::FADD, DL, VT, N0,
12383         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
12384 
12385   // FSUB -> FMA combines:
12386   if (SDValue Fused = visitFSUBForFMACombine(N)) {
12387     AddToWorklist(Fused.getNode());
12388     return Fused;
12389   }
12390 
12391   return SDValue();
12392 }
12393 
12394 /// Return true if both inputs are at least as cheap in negated form and at
12395 /// least one input is strictly cheaper in negated form.
12396 bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
12397   TargetLowering::NegatibleCost LHSNeg =
12398       TLI.getNegatibleCost(X, DAG, LegalOperations, ForCodeSize);
12399   if (TargetLowering::NegatibleCost::Expensive == LHSNeg)
12400     return false;
12401 
12402   TargetLowering::NegatibleCost RHSNeg =
12403       TLI.getNegatibleCost(Y, DAG, LegalOperations, ForCodeSize);
12404   if (TargetLowering::NegatibleCost::Expensive == RHSNeg)
12405     return false;
12406 
12407   // Both negated operands are at least as cheap as their counterparts.
12408   // Check to see if at least one is cheaper negated.
12409   return (TargetLowering::NegatibleCost::Cheaper == LHSNeg ||
12410           TargetLowering::NegatibleCost::Cheaper == RHSNeg);
12411 }
12412 
12413 SDValue DAGCombiner::visitFMUL(SDNode *N) {
12414   SDValue N0 = N->getOperand(0);
12415   SDValue N1 = N->getOperand(1);
12416   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12417   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12418   EVT VT = N->getValueType(0);
12419   SDLoc DL(N);
12420   const TargetOptions &Options = DAG.getTarget().Options;
12421   const SDNodeFlags Flags = N->getFlags();
12422 
12423   // fold vector ops
12424   if (VT.isVector()) {
12425     // This just handles C1 * C2 for vectors. Other vector folds are below.
12426     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12427       return FoldedVOp;
12428   }
12429 
12430   // fold (fmul c1, c2) -> c1*c2
12431   if (N0CFP && N1CFP)
12432     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12433 
12434   // canonicalize constant to RHS
12435   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12436      !isConstantFPBuildVectorOrConstantFP(N1))
12437     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12438 
12439   if (SDValue NewSel = foldBinOpIntoSelect(N))
12440     return NewSel;
12441 
12442   if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12443       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12444     // fold (fmul A, 0) -> 0
12445     if (N1CFP && N1CFP->isZero())
12446       return N1;
12447   }
12448 
12449   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12450     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12451     if (isConstantFPBuildVectorOrConstantFP(N1) &&
12452         N0.getOpcode() == ISD::FMUL) {
12453       SDValue N00 = N0.getOperand(0);
12454       SDValue N01 = N0.getOperand(1);
12455       // Avoid an infinite loop by making sure that N00 is not a constant
12456       // (the inner multiply has not been constant folded yet).
12457       if (isConstantFPBuildVectorOrConstantFP(N01) &&
12458           !isConstantFPBuildVectorOrConstantFP(N00)) {
12459         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12460         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12461       }
12462     }
12463 
12464     // Match a special-case: we convert X * 2.0 into fadd.
12465     // fmul (fadd X, X), C -> fmul X, 2.0 * C
12466     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12467         N0.getOperand(0) == N0.getOperand(1)) {
12468       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12469       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12470       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12471     }
12472   }
12473 
12474   // fold (fmul X, 2.0) -> (fadd X, X)
12475   if (N1CFP && N1CFP->isExactlyValue(+2.0))
12476     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12477 
12478   // fold (fmul X, -1.0) -> (fneg X)
12479   if (N1CFP && N1CFP->isExactlyValue(-1.0))
12480     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12481       return DAG.getNode(ISD::FNEG, DL, VT, N0);
12482 
12483   // -N0 * -N1 --> N0 * N1
12484   if (isCheaperToUseNegatedFPOps(N0, N1)) {
12485     SDValue NegN0 =
12486         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12487     SDValue NegN1 =
12488         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12489     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
12490   }
12491 
12492   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12493   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12494   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12495       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12496       TLI.isOperationLegal(ISD::FABS, VT)) {
12497     SDValue Select = N0, X = N1;
12498     if (Select.getOpcode() != ISD::SELECT)
12499       std::swap(Select, X);
12500 
12501     SDValue Cond = Select.getOperand(0);
12502     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12503     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12504 
12505     if (TrueOpnd && FalseOpnd &&
12506         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12507         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12508         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12509       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12510       switch (CC) {
12511       default: break;
12512       case ISD::SETOLT:
12513       case ISD::SETULT:
12514       case ISD::SETOLE:
12515       case ISD::SETULE:
12516       case ISD::SETLT:
12517       case ISD::SETLE:
12518         std::swap(TrueOpnd, FalseOpnd);
12519         LLVM_FALLTHROUGH;
12520       case ISD::SETOGT:
12521       case ISD::SETUGT:
12522       case ISD::SETOGE:
12523       case ISD::SETUGE:
12524       case ISD::SETGT:
12525       case ISD::SETGE:
12526         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12527             TLI.isOperationLegal(ISD::FNEG, VT))
12528           return DAG.getNode(ISD::FNEG, DL, VT,
12529                    DAG.getNode(ISD::FABS, DL, VT, X));
12530         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12531           return DAG.getNode(ISD::FABS, DL, VT, X);
12532 
12533         break;
12534       }
12535     }
12536   }
12537 
12538   // FMUL -> FMA combines:
12539   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
12540     AddToWorklist(Fused.getNode());
12541     return Fused;
12542   }
12543 
12544   return SDValue();
12545 }
12546 
12547 SDValue DAGCombiner::visitFMA(SDNode *N) {
12548   SDValue N0 = N->getOperand(0);
12549   SDValue N1 = N->getOperand(1);
12550   SDValue N2 = N->getOperand(2);
12551   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12552   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12553   EVT VT = N->getValueType(0);
12554   SDLoc DL(N);
12555   const TargetOptions &Options = DAG.getTarget().Options;
12556 
12557   // FMA nodes have flags that propagate to the created nodes.
12558   const SDNodeFlags Flags = N->getFlags();
12559   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
12560 
12561   // Constant fold FMA.
12562   if (isa<ConstantFPSDNode>(N0) &&
12563       isa<ConstantFPSDNode>(N1) &&
12564       isa<ConstantFPSDNode>(N2)) {
12565     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
12566   }
12567 
12568   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
12569   if (isCheaperToUseNegatedFPOps(N0, N1)) {
12570     SDValue NegN0 =
12571         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12572     SDValue NegN1 =
12573         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12574     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
12575   }
12576 
12577   if (UnsafeFPMath) {
12578     if (N0CFP && N0CFP->isZero())
12579       return N2;
12580     if (N1CFP && N1CFP->isZero())
12581       return N2;
12582   }
12583   // TODO: The FMA node should have flags that propagate to these nodes.
12584   if (N0CFP && N0CFP->isExactlyValue(1.0))
12585     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
12586   if (N1CFP && N1CFP->isExactlyValue(1.0))
12587     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
12588 
12589   // Canonicalize (fma c, x, y) -> (fma x, c, y)
12590   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12591      !isConstantFPBuildVectorOrConstantFP(N1))
12592     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
12593 
12594   if (UnsafeFPMath) {
12595     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
12596     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
12597         isConstantFPBuildVectorOrConstantFP(N1) &&
12598         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
12599       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12600                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
12601                                      Flags), Flags);
12602     }
12603 
12604     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
12605     if (N0.getOpcode() == ISD::FMUL &&
12606         isConstantFPBuildVectorOrConstantFP(N1) &&
12607         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12608       return DAG.getNode(ISD::FMA, DL, VT,
12609                          N0.getOperand(0),
12610                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
12611                                      Flags),
12612                          N2);
12613     }
12614   }
12615 
12616   // (fma x, 1, y) -> (fadd x, y)
12617   // (fma x, -1, y) -> (fadd (fneg x), y)
12618   if (N1CFP) {
12619     if (N1CFP->isExactlyValue(1.0))
12620       // TODO: The FMA node should have flags that propagate to this node.
12621       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
12622 
12623     if (N1CFP->isExactlyValue(-1.0) &&
12624         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
12625       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
12626       AddToWorklist(RHSNeg.getNode());
12627       // TODO: The FMA node should have flags that propagate to this node.
12628       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
12629     }
12630 
12631     // fma (fneg x), K, y -> fma x -K, y
12632     if (N0.getOpcode() == ISD::FNEG &&
12633         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12634          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
12635                                               ForCodeSize)))) {
12636       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
12637                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
12638     }
12639   }
12640 
12641   if (UnsafeFPMath) {
12642     // (fma x, c, x) -> (fmul x, (c+1))
12643     if (N1CFP && N0 == N2) {
12644       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12645                          DAG.getNode(ISD::FADD, DL, VT, N1,
12646                                      DAG.getConstantFP(1.0, DL, VT), Flags),
12647                          Flags);
12648     }
12649 
12650     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
12651     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
12652       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12653                          DAG.getNode(ISD::FADD, DL, VT, N1,
12654                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
12655                          Flags);
12656     }
12657   }
12658 
12659   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
12660   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
12661   if (!TLI.isFNegFree(VT) &&
12662       TLI.getNegatibleCost(SDValue(N, 0), DAG, LegalOperations, ForCodeSize) ==
12663           TargetLowering::NegatibleCost::Cheaper)
12664     return DAG.getNode(ISD::FNEG, DL, VT,
12665                        TLI.getNegatedExpression(SDValue(N, 0), DAG,
12666                                                 LegalOperations, ForCodeSize),
12667                        Flags);
12668   return SDValue();
12669 }
12670 
12671 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12672 // reciprocal.
12673 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
12674 // Notice that this is not always beneficial. One reason is different targets
12675 // may have different costs for FDIV and FMUL, so sometimes the cost of two
12676 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
12677 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
12678 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
12679   // TODO: Limit this transform based on optsize/minsize - it always creates at
12680   //       least 1 extra instruction. But the perf win may be substantial enough
12681   //       that only minsize should restrict this.
12682   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
12683   const SDNodeFlags Flags = N->getFlags();
12684   if (!UnsafeMath && !Flags.hasAllowReciprocal())
12685     return SDValue();
12686 
12687   // Skip if current node is a reciprocal/fneg-reciprocal.
12688   SDValue N0 = N->getOperand(0);
12689   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
12690   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
12691     return SDValue();
12692 
12693   // Exit early if the target does not want this transform or if there can't
12694   // possibly be enough uses of the divisor to make the transform worthwhile.
12695   SDValue N1 = N->getOperand(1);
12696   unsigned MinUses = TLI.combineRepeatedFPDivisors();
12697 
12698   // For splat vectors, scale the number of uses by the splat factor. If we can
12699   // convert the division into a scalar op, that will likely be much faster.
12700   unsigned NumElts = 1;
12701   EVT VT = N->getValueType(0);
12702   if (VT.isVector() && DAG.isSplatValue(N1))
12703     NumElts = VT.getVectorNumElements();
12704 
12705   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
12706     return SDValue();
12707 
12708   // Find all FDIV users of the same divisor.
12709   // Use a set because duplicates may be present in the user list.
12710   SetVector<SDNode *> Users;
12711   for (auto *U : N1->uses()) {
12712     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
12713       // This division is eligible for optimization only if global unsafe math
12714       // is enabled or if this division allows reciprocal formation.
12715       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
12716         Users.insert(U);
12717     }
12718   }
12719 
12720   // Now that we have the actual number of divisor uses, make sure it meets
12721   // the minimum threshold specified by the target.
12722   if ((Users.size() * NumElts) < MinUses)
12723     return SDValue();
12724 
12725   SDLoc DL(N);
12726   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
12727   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
12728 
12729   // Dividend / Divisor -> Dividend * Reciprocal
12730   for (auto *U : Users) {
12731     SDValue Dividend = U->getOperand(0);
12732     if (Dividend != FPOne) {
12733       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
12734                                     Reciprocal, Flags);
12735       CombineTo(U, NewNode);
12736     } else if (U != Reciprocal.getNode()) {
12737       // In the absence of fast-math-flags, this user node is always the
12738       // same node as Reciprocal, but with FMF they may be different nodes.
12739       CombineTo(U, Reciprocal);
12740     }
12741   }
12742   return SDValue(N, 0);  // N was replaced.
12743 }
12744 
12745 SDValue DAGCombiner::visitFDIV(SDNode *N) {
12746   SDValue N0 = N->getOperand(0);
12747   SDValue N1 = N->getOperand(1);
12748   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12749   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12750   EVT VT = N->getValueType(0);
12751   SDLoc DL(N);
12752   const TargetOptions &Options = DAG.getTarget().Options;
12753   SDNodeFlags Flags = N->getFlags();
12754 
12755   // fold vector ops
12756   if (VT.isVector())
12757     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12758       return FoldedVOp;
12759 
12760   // fold (fdiv c1, c2) -> c1/c2
12761   if (N0CFP && N1CFP)
12762     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12763 
12764   if (SDValue NewSel = foldBinOpIntoSelect(N))
12765     return NewSel;
12766 
12767   if (SDValue V = combineRepeatedFPDivisors(N))
12768     return V;
12769 
12770   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
12771     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12772     if (N1CFP) {
12773       // Compute the reciprocal 1.0 / c2.
12774       const APFloat &N1APF = N1CFP->getValueAPF();
12775       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12776       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12777       // Only do the transform if the reciprocal is a legal fp immediate that
12778       // isn't too nasty (eg NaN, denormal, ...).
12779       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
12780           (!LegalOperations ||
12781            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12782            // backend)... we should handle this gracefully after Legalize.
12783            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12784            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12785            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
12786         return DAG.getNode(ISD::FMUL, DL, VT, N0,
12787                            DAG.getConstantFP(Recip, DL, VT), Flags);
12788     }
12789 
12790     // If this FDIV is part of a reciprocal square root, it may be folded
12791     // into a target-specific square root estimate instruction.
12792     if (N1.getOpcode() == ISD::FSQRT) {
12793       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
12794         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12795     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12796                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12797       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12798                                           Flags)) {
12799         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12800         AddToWorklist(RV.getNode());
12801         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12802       }
12803     } else if (N1.getOpcode() == ISD::FP_ROUND &&
12804                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12805       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12806                                           Flags)) {
12807         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12808         AddToWorklist(RV.getNode());
12809         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12810       }
12811     } else if (N1.getOpcode() == ISD::FMUL) {
12812       // Look through an FMUL. Even though this won't remove the FDIV directly,
12813       // it's still worthwhile to get rid of the FSQRT if possible.
12814       SDValue SqrtOp;
12815       SDValue OtherOp;
12816       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12817         SqrtOp = N1.getOperand(0);
12818         OtherOp = N1.getOperand(1);
12819       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
12820         SqrtOp = N1.getOperand(1);
12821         OtherOp = N1.getOperand(0);
12822       }
12823       if (SqrtOp.getNode()) {
12824         // We found a FSQRT, so try to make this fold:
12825         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12826         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12827           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12828           AddToWorklist(RV.getNode());
12829           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12830         }
12831       }
12832     }
12833 
12834     // Fold into a reciprocal estimate and multiply instead of a real divide.
12835     if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
12836       return RV;
12837   }
12838 
12839   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12840   if (isCheaperToUseNegatedFPOps(N0, N1))
12841     return DAG.getNode(
12842         ISD::FDIV, SDLoc(N), VT,
12843         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
12844         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
12845 
12846   return SDValue();
12847 }
12848 
12849 SDValue DAGCombiner::visitFREM(SDNode *N) {
12850   SDValue N0 = N->getOperand(0);
12851   SDValue N1 = N->getOperand(1);
12852   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12853   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12854   EVT VT = N->getValueType(0);
12855 
12856   // fold (frem c1, c2) -> fmod(c1,c2)
12857   if (N0CFP && N1CFP)
12858     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12859 
12860   if (SDValue NewSel = foldBinOpIntoSelect(N))
12861     return NewSel;
12862 
12863   return SDValue();
12864 }
12865 
12866 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12867   SDNodeFlags Flags = N->getFlags();
12868   if (!DAG.getTarget().Options.UnsafeFPMath &&
12869       !Flags.hasApproximateFuncs())
12870     return SDValue();
12871 
12872   SDValue N0 = N->getOperand(0);
12873   if (TLI.isFsqrtCheap(N0, DAG))
12874     return SDValue();
12875 
12876   // FSQRT nodes have flags that propagate to the created nodes.
12877   return buildSqrtEstimate(N0, Flags);
12878 }
12879 
12880 /// copysign(x, fp_extend(y)) -> copysign(x, y)
12881 /// copysign(x, fp_round(y)) -> copysign(x, y)
12882 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
12883   SDValue N1 = N->getOperand(1);
12884   if ((N1.getOpcode() == ISD::FP_EXTEND ||
12885        N1.getOpcode() == ISD::FP_ROUND)) {
12886     // Do not optimize out type conversion of f128 type yet.
12887     // For some targets like x86_64, configuration is changed to keep one f128
12888     // value in one SSE register, but instruction selection cannot handle
12889     // FCOPYSIGN on SSE registers yet.
12890     EVT N1VT = N1->getValueType(0);
12891     EVT N1Op0VT = N1->getOperand(0).getValueType();
12892     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
12893   }
12894   return false;
12895 }
12896 
12897 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
12898   SDValue N0 = N->getOperand(0);
12899   SDValue N1 = N->getOperand(1);
12900   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12901   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12902   EVT VT = N->getValueType(0);
12903 
12904   if (N0CFP && N1CFP) // Constant fold
12905     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
12906 
12907   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
12908     const APFloat &V = N1C->getValueAPF();
12909     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
12910     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12911     if (!V.isNegative()) {
12912       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
12913         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12914     } else {
12915       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12916         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
12917                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
12918     }
12919   }
12920 
12921   // copysign(fabs(x), y) -> copysign(x, y)
12922   // copysign(fneg(x), y) -> copysign(x, y)
12923   // copysign(copysign(x,z), y) -> copysign(x, y)
12924   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
12925       N0.getOpcode() == ISD::FCOPYSIGN)
12926     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
12927 
12928   // copysign(x, abs(y)) -> abs(x)
12929   if (N1.getOpcode() == ISD::FABS)
12930     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12931 
12932   // copysign(x, copysign(y,z)) -> copysign(x, z)
12933   if (N1.getOpcode() == ISD::FCOPYSIGN)
12934     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
12935 
12936   // copysign(x, fp_extend(y)) -> copysign(x, y)
12937   // copysign(x, fp_round(y)) -> copysign(x, y)
12938   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
12939     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
12940 
12941   return SDValue();
12942 }
12943 
12944 SDValue DAGCombiner::visitFPOW(SDNode *N) {
12945   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
12946   if (!ExponentC)
12947     return SDValue();
12948 
12949   // Try to convert x ** (1/3) into cube root.
12950   // TODO: Handle the various flavors of long double.
12951   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12952   //       Some range near 1/3 should be fine.
12953   EVT VT = N->getValueType(0);
12954   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
12955       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
12956     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12957     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12958     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
12959     // For regular numbers, rounding may cause the results to differ.
12960     // Therefore, we require { nsz ninf nnan afn } for this transform.
12961     // TODO: We could select out the special cases if we don't have nsz/ninf.
12962     SDNodeFlags Flags = N->getFlags();
12963     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
12964         !Flags.hasApproximateFuncs())
12965       return SDValue();
12966 
12967     // Do not create a cbrt() libcall if the target does not have it, and do not
12968     // turn a pow that has lowering support into a cbrt() libcall.
12969     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12970         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
12971          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
12972       return SDValue();
12973 
12974     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12975   }
12976 
12977   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12978   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12979   // TODO: This could be extended (using a target hook) to handle smaller
12980   // power-of-2 fractional exponents.
12981   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12982   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12983   if (ExponentIs025 || ExponentIs075) {
12984     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12985     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
12986     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12987     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
12988     // For regular numbers, rounding may cause the results to differ.
12989     // Therefore, we require { nsz ninf afn } for this transform.
12990     // TODO: We could select out the special cases if we don't have nsz/ninf.
12991     SDNodeFlags Flags = N->getFlags();
12992 
12993     // We only need no signed zeros for the 0.25 case.
12994     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
12995         !Flags.hasApproximateFuncs())
12996       return SDValue();
12997 
12998     // Don't double the number of libcalls. We are trying to inline fast code.
12999     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
13000       return SDValue();
13001 
13002     // Assume that libcalls are the smallest code.
13003     // TODO: This restriction should probably be lifted for vectors.
13004     if (ForCodeSize)
13005       return SDValue();
13006 
13007     // pow(X, 0.25) --> sqrt(sqrt(X))
13008     SDLoc DL(N);
13009     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
13010     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
13011     if (ExponentIs025)
13012       return SqrtSqrt;
13013     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
13014     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
13015   }
13016 
13017   return SDValue();
13018 }
13019 
13020 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
13021                                const TargetLowering &TLI) {
13022   // This optimization is guarded by a function attribute because it may produce
13023   // unexpected results. Ie, programs may be relying on the platform-specific
13024   // undefined behavior when the float-to-int conversion overflows.
13025   const Function &F = DAG.getMachineFunction().getFunction();
13026   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
13027   if (StrictOverflow.getValueAsString().equals("false"))
13028     return SDValue();
13029 
13030   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
13031   // replacing casts with a libcall. We also must be allowed to ignore -0.0
13032   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
13033   // conversions would return +0.0.
13034   // FIXME: We should be able to use node-level FMF here.
13035   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
13036   EVT VT = N->getValueType(0);
13037   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
13038       !DAG.getTarget().Options.NoSignedZerosFPMath)
13039     return SDValue();
13040 
13041   // fptosi/fptoui round towards zero, so converting from FP to integer and
13042   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
13043   SDValue N0 = N->getOperand(0);
13044   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
13045       N0.getOperand(0).getValueType() == VT)
13046     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13047 
13048   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
13049       N0.getOperand(0).getValueType() == VT)
13050     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13051 
13052   return SDValue();
13053 }
13054 
13055 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
13056   SDValue N0 = N->getOperand(0);
13057   EVT VT = N->getValueType(0);
13058   EVT OpVT = N0.getValueType();
13059 
13060   // [us]itofp(undef) = 0, because the result value is bounded.
13061   if (N0.isUndef())
13062     return DAG.getConstantFP(0.0, SDLoc(N), VT);
13063 
13064   // fold (sint_to_fp c1) -> c1fp
13065   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13066       // ...but only if the target supports immediate floating-point values
13067       (!LegalOperations ||
13068        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13069     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13070 
13071   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
13072   // but UINT_TO_FP is legal on this target, try to convert.
13073   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
13074       hasOperation(ISD::UINT_TO_FP, OpVT)) {
13075     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
13076     if (DAG.SignBitIsZero(N0))
13077       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13078   }
13079 
13080   // The next optimizations are desirable only if SELECT_CC can be lowered.
13081   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
13082     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
13083     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
13084         !VT.isVector() &&
13085         (!LegalOperations ||
13086          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13087       SDLoc DL(N);
13088       SDValue Ops[] =
13089         { N0.getOperand(0), N0.getOperand(1),
13090           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
13091           N0.getOperand(2) };
13092       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
13093     }
13094 
13095     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
13096     //      (select_cc x, y, 1.0, 0.0,, cc)
13097     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
13098         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
13099         (!LegalOperations ||
13100          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13101       SDLoc DL(N);
13102       SDValue Ops[] =
13103         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
13104           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
13105           N0.getOperand(0).getOperand(2) };
13106       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
13107     }
13108   }
13109 
13110   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13111     return FTrunc;
13112 
13113   return SDValue();
13114 }
13115 
13116 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
13117   SDValue N0 = N->getOperand(0);
13118   EVT VT = N->getValueType(0);
13119   EVT OpVT = N0.getValueType();
13120 
13121   // [us]itofp(undef) = 0, because the result value is bounded.
13122   if (N0.isUndef())
13123     return DAG.getConstantFP(0.0, SDLoc(N), VT);
13124 
13125   // fold (uint_to_fp c1) -> c1fp
13126   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13127       // ...but only if the target supports immediate floating-point values
13128       (!LegalOperations ||
13129        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13130     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13131 
13132   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
13133   // but SINT_TO_FP is legal on this target, try to convert.
13134   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
13135       hasOperation(ISD::SINT_TO_FP, OpVT)) {
13136     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
13137     if (DAG.SignBitIsZero(N0))
13138       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13139   }
13140 
13141   // The next optimizations are desirable only if SELECT_CC can be lowered.
13142   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
13143     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
13144     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
13145         (!LegalOperations ||
13146          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13147       SDLoc DL(N);
13148       SDValue Ops[] =
13149         { N0.getOperand(0), N0.getOperand(1),
13150           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
13151           N0.getOperand(2) };
13152       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
13153     }
13154   }
13155 
13156   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13157     return FTrunc;
13158 
13159   return SDValue();
13160 }
13161 
13162 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
13163 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
13164   SDValue N0 = N->getOperand(0);
13165   EVT VT = N->getValueType(0);
13166 
13167   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
13168     return SDValue();
13169 
13170   SDValue Src = N0.getOperand(0);
13171   EVT SrcVT = Src.getValueType();
13172   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
13173   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
13174 
13175   // We can safely assume the conversion won't overflow the output range,
13176   // because (for example) (uint8_t)18293.f is undefined behavior.
13177 
13178   // Since we can assume the conversion won't overflow, our decision as to
13179   // whether the input will fit in the float should depend on the minimum
13180   // of the input range and output range.
13181 
13182   // This means this is also safe for a signed input and unsigned output, since
13183   // a negative input would lead to undefined behavior.
13184   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13185   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13186   unsigned ActualSize = std::min(InputSize, OutputSize);
13187   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13188 
13189   // We can only fold away the float conversion if the input range can be
13190   // represented exactly in the float range.
13191   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13192     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13193       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
13194                                                        : ISD::ZERO_EXTEND;
13195       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13196     }
13197     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13198       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13199     return DAG.getBitcast(VT, Src);
13200   }
13201   return SDValue();
13202 }
13203 
13204 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13205   SDValue N0 = N->getOperand(0);
13206   EVT VT = N->getValueType(0);
13207 
13208   // fold (fp_to_sint undef) -> undef
13209   if (N0.isUndef())
13210     return DAG.getUNDEF(VT);
13211 
13212   // fold (fp_to_sint c1fp) -> c1
13213   if (isConstantFPBuildVectorOrConstantFP(N0))
13214     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13215 
13216   return FoldIntToFPToInt(N, DAG);
13217 }
13218 
13219 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13220   SDValue N0 = N->getOperand(0);
13221   EVT VT = N->getValueType(0);
13222 
13223   // fold (fp_to_uint undef) -> undef
13224   if (N0.isUndef())
13225     return DAG.getUNDEF(VT);
13226 
13227   // fold (fp_to_uint c1fp) -> c1
13228   if (isConstantFPBuildVectorOrConstantFP(N0))
13229     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13230 
13231   return FoldIntToFPToInt(N, DAG);
13232 }
13233 
13234 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13235   SDValue N0 = N->getOperand(0);
13236   SDValue N1 = N->getOperand(1);
13237   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13238   EVT VT = N->getValueType(0);
13239 
13240   // fold (fp_round c1fp) -> c1fp
13241   if (N0CFP)
13242     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13243 
13244   // fold (fp_round (fp_extend x)) -> x
13245   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
13246     return N0.getOperand(0);
13247 
13248   // fold (fp_round (fp_round x)) -> (fp_round x)
13249   if (N0.getOpcode() == ISD::FP_ROUND) {
13250     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13251     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13252 
13253     // Skip this folding if it results in an fp_round from f80 to f16.
13254     //
13255     // f80 to f16 always generates an expensive (and as yet, unimplemented)
13256     // libcall to __truncxfhf2 instead of selecting native f16 conversion
13257     // instructions from f32 or f64.  Moreover, the first (value-preserving)
13258     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13259     // x86.
13260     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
13261       return SDValue();
13262 
13263     // If the first fp_round isn't a value preserving truncation, it might
13264     // introduce a tie in the second fp_round, that wouldn't occur in the
13265     // single-step fp_round we want to fold to.
13266     // In other words, double rounding isn't the same as rounding.
13267     // Also, this is a value preserving truncation iff both fp_round's are.
13268     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13269       SDLoc DL(N);
13270       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13271                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13272     }
13273   }
13274 
13275   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13276   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13277     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13278                               N0.getOperand(0), N1);
13279     AddToWorklist(Tmp.getNode());
13280     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13281                        Tmp, N0.getOperand(1));
13282   }
13283 
13284   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13285     return NewVSel;
13286 
13287   return SDValue();
13288 }
13289 
13290 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13291   SDValue N0 = N->getOperand(0);
13292   EVT VT = N->getValueType(0);
13293 
13294   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13295   if (N->hasOneUse() &&
13296       N->use_begin()->getOpcode() == ISD::FP_ROUND)
13297     return SDValue();
13298 
13299   // fold (fp_extend c1fp) -> c1fp
13300   if (isConstantFPBuildVectorOrConstantFP(N0))
13301     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13302 
13303   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13304   if (N0.getOpcode() == ISD::FP16_TO_FP &&
13305       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
13306     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13307 
13308   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13309   // value of X.
13310   if (N0.getOpcode() == ISD::FP_ROUND
13311       && N0.getConstantOperandVal(1) == 1) {
13312     SDValue In = N0.getOperand(0);
13313     if (In.getValueType() == VT) return In;
13314     if (VT.bitsLT(In.getValueType()))
13315       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13316                          In, N0.getOperand(1));
13317     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13318   }
13319 
13320   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13321   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13322        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13323     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13324     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13325                                      LN0->getChain(),
13326                                      LN0->getBasePtr(), N0.getValueType(),
13327                                      LN0->getMemOperand());
13328     CombineTo(N, ExtLoad);
13329     CombineTo(N0.getNode(),
13330               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13331                           N0.getValueType(), ExtLoad,
13332                           DAG.getIntPtrConstant(1, SDLoc(N0))),
13333               ExtLoad.getValue(1));
13334     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13335   }
13336 
13337   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13338     return NewVSel;
13339 
13340   return SDValue();
13341 }
13342 
13343 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13344   SDValue N0 = N->getOperand(0);
13345   EVT VT = N->getValueType(0);
13346 
13347   // fold (fceil c1) -> fceil(c1)
13348   if (isConstantFPBuildVectorOrConstantFP(N0))
13349     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13350 
13351   return SDValue();
13352 }
13353 
13354 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13355   SDValue N0 = N->getOperand(0);
13356   EVT VT = N->getValueType(0);
13357 
13358   // fold (ftrunc c1) -> ftrunc(c1)
13359   if (isConstantFPBuildVectorOrConstantFP(N0))
13360     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13361 
13362   // fold ftrunc (known rounded int x) -> x
13363   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13364   // likely to be generated to extract integer from a rounded floating value.
13365   switch (N0.getOpcode()) {
13366   default: break;
13367   case ISD::FRINT:
13368   case ISD::FTRUNC:
13369   case ISD::FNEARBYINT:
13370   case ISD::FFLOOR:
13371   case ISD::FCEIL:
13372     return N0;
13373   }
13374 
13375   return SDValue();
13376 }
13377 
13378 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13379   SDValue N0 = N->getOperand(0);
13380   EVT VT = N->getValueType(0);
13381 
13382   // fold (ffloor c1) -> ffloor(c1)
13383   if (isConstantFPBuildVectorOrConstantFP(N0))
13384     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13385 
13386   return SDValue();
13387 }
13388 
13389 // FIXME: FNEG and FABS have a lot in common; refactor.
13390 SDValue DAGCombiner::visitFNEG(SDNode *N) {
13391   SDValue N0 = N->getOperand(0);
13392   EVT VT = N->getValueType(0);
13393 
13394   // Constant fold FNEG.
13395   if (isConstantFPBuildVectorOrConstantFP(N0))
13396     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13397 
13398   if (TLI.getNegatibleCost(N0, DAG, LegalOperations, ForCodeSize) !=
13399       TargetLowering::NegatibleCost::Expensive)
13400     return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
13401 
13402   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
13403   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
13404   // know it was called from a context with a nsz flag if the input fsub does
13405   // not.
13406   if (N0.getOpcode() == ISD::FSUB &&
13407       (DAG.getTarget().Options.NoSignedZerosFPMath ||
13408        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
13409     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
13410                        N0.getOperand(0), N->getFlags());
13411   }
13412 
13413   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13414   // constant pool values.
13415   if (!TLI.isFNegFree(VT) &&
13416       N0.getOpcode() == ISD::BITCAST &&
13417       N0.getNode()->hasOneUse()) {
13418     SDValue Int = N0.getOperand(0);
13419     EVT IntVT = Int.getValueType();
13420     if (IntVT.isInteger() && !IntVT.isVector()) {
13421       APInt SignMask;
13422       if (N0.getValueType().isVector()) {
13423         // For a vector, get a mask such as 0x80... per scalar element
13424         // and splat it.
13425         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13426         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13427       } else {
13428         // For a scalar, just generate 0x80...
13429         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13430       }
13431       SDLoc DL0(N0);
13432       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13433                         DAG.getConstant(SignMask, DL0, IntVT));
13434       AddToWorklist(Int.getNode());
13435       return DAG.getBitcast(VT, Int);
13436     }
13437   }
13438 
13439   // (fneg (fmul c, x)) -> (fmul -c, x)
13440   if (N0.getOpcode() == ISD::FMUL &&
13441       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13442     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13443     if (CFP1) {
13444       APFloat CVal = CFP1->getValueAPF();
13445       CVal.changeSign();
13446       if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13447                        TLI.isOperationLegal(ISD::ConstantFP, VT)))
13448         return DAG.getNode(
13449             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13450             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13451             N0->getFlags());
13452     }
13453   }
13454 
13455   return SDValue();
13456 }
13457 
13458 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13459                             APFloat (*Op)(const APFloat &, const APFloat &)) {
13460   SDValue N0 = N->getOperand(0);
13461   SDValue N1 = N->getOperand(1);
13462   EVT VT = N->getValueType(0);
13463   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13464   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13465 
13466   if (N0CFP && N1CFP) {
13467     const APFloat &C0 = N0CFP->getValueAPF();
13468     const APFloat &C1 = N1CFP->getValueAPF();
13469     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13470   }
13471 
13472   // Canonicalize to constant on RHS.
13473   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13474       !isConstantFPBuildVectorOrConstantFP(N1))
13475     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13476 
13477   return SDValue();
13478 }
13479 
13480 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13481   return visitFMinMax(DAG, N, minnum);
13482 }
13483 
13484 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13485   return visitFMinMax(DAG, N, maxnum);
13486 }
13487 
13488 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13489   return visitFMinMax(DAG, N, minimum);
13490 }
13491 
13492 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13493   return visitFMinMax(DAG, N, maximum);
13494 }
13495 
13496 SDValue DAGCombiner::visitFABS(SDNode *N) {
13497   SDValue N0 = N->getOperand(0);
13498   EVT VT = N->getValueType(0);
13499 
13500   // fold (fabs c1) -> fabs(c1)
13501   if (isConstantFPBuildVectorOrConstantFP(N0))
13502     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13503 
13504   // fold (fabs (fabs x)) -> (fabs x)
13505   if (N0.getOpcode() == ISD::FABS)
13506     return N->getOperand(0);
13507 
13508   // fold (fabs (fneg x)) -> (fabs x)
13509   // fold (fabs (fcopysign x, y)) -> (fabs x)
13510   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13511     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13512 
13513   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13514   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
13515     SDValue Int = N0.getOperand(0);
13516     EVT IntVT = Int.getValueType();
13517     if (IntVT.isInteger() && !IntVT.isVector()) {
13518       APInt SignMask;
13519       if (N0.getValueType().isVector()) {
13520         // For a vector, get a mask such as 0x7f... per scalar element
13521         // and splat it.
13522         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
13523         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13524       } else {
13525         // For a scalar, just generate 0x7f...
13526         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
13527       }
13528       SDLoc DL(N0);
13529       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
13530                         DAG.getConstant(SignMask, DL, IntVT));
13531       AddToWorklist(Int.getNode());
13532       return DAG.getBitcast(N->getValueType(0), Int);
13533     }
13534   }
13535 
13536   return SDValue();
13537 }
13538 
13539 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
13540   SDValue Chain = N->getOperand(0);
13541   SDValue N1 = N->getOperand(1);
13542   SDValue N2 = N->getOperand(2);
13543 
13544   // If N is a constant we could fold this into a fallthrough or unconditional
13545   // branch. However that doesn't happen very often in normal code, because
13546   // Instcombine/SimplifyCFG should have handled the available opportunities.
13547   // If we did this folding here, it would be necessary to update the
13548   // MachineBasicBlock CFG, which is awkward.
13549 
13550   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
13551   // on the target.
13552   if (N1.getOpcode() == ISD::SETCC &&
13553       TLI.isOperationLegalOrCustom(ISD::BR_CC,
13554                                    N1.getOperand(0).getValueType())) {
13555     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13556                        Chain, N1.getOperand(2),
13557                        N1.getOperand(0), N1.getOperand(1), N2);
13558   }
13559 
13560   if (N1.hasOneUse()) {
13561     // rebuildSetCC calls visitXor which may change the Chain when there is a
13562     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
13563     HandleSDNode ChainHandle(Chain);
13564     if (SDValue NewN1 = rebuildSetCC(N1))
13565       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
13566                          ChainHandle.getValue(), NewN1, N2);
13567   }
13568 
13569   return SDValue();
13570 }
13571 
13572 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
13573   if (N.getOpcode() == ISD::SRL ||
13574       (N.getOpcode() == ISD::TRUNCATE &&
13575        (N.getOperand(0).hasOneUse() &&
13576         N.getOperand(0).getOpcode() == ISD::SRL))) {
13577     // Look pass the truncate.
13578     if (N.getOpcode() == ISD::TRUNCATE)
13579       N = N.getOperand(0);
13580 
13581     // Match this pattern so that we can generate simpler code:
13582     //
13583     //   %a = ...
13584     //   %b = and i32 %a, 2
13585     //   %c = srl i32 %b, 1
13586     //   brcond i32 %c ...
13587     //
13588     // into
13589     //
13590     //   %a = ...
13591     //   %b = and i32 %a, 2
13592     //   %c = setcc eq %b, 0
13593     //   brcond %c ...
13594     //
13595     // This applies only when the AND constant value has one bit set and the
13596     // SRL constant is equal to the log2 of the AND constant. The back-end is
13597     // smart enough to convert the result into a TEST/JMP sequence.
13598     SDValue Op0 = N.getOperand(0);
13599     SDValue Op1 = N.getOperand(1);
13600 
13601     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
13602       SDValue AndOp1 = Op0.getOperand(1);
13603 
13604       if (AndOp1.getOpcode() == ISD::Constant) {
13605         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
13606 
13607         if (AndConst.isPowerOf2() &&
13608             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
13609           SDLoc DL(N);
13610           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
13611                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
13612                               ISD::SETNE);
13613         }
13614       }
13615     }
13616   }
13617 
13618   // Transform br(xor(x, y)) -> br(x != y)
13619   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
13620   if (N.getOpcode() == ISD::XOR) {
13621     // Because we may call this on a speculatively constructed
13622     // SimplifiedSetCC Node, we need to simplify this node first.
13623     // Ideally this should be folded into SimplifySetCC and not
13624     // here. For now, grab a handle to N so we don't lose it from
13625     // replacements interal to the visit.
13626     HandleSDNode XORHandle(N);
13627     while (N.getOpcode() == ISD::XOR) {
13628       SDValue Tmp = visitXOR(N.getNode());
13629       // No simplification done.
13630       if (!Tmp.getNode())
13631         break;
13632       // Returning N is form in-visit replacement that may invalidated
13633       // N. Grab value from Handle.
13634       if (Tmp.getNode() == N.getNode())
13635         N = XORHandle.getValue();
13636       else // Node simplified. Try simplifying again.
13637         N = Tmp;
13638     }
13639 
13640     if (N.getOpcode() != ISD::XOR)
13641       return N;
13642 
13643     SDNode *TheXor = N.getNode();
13644 
13645     SDValue Op0 = TheXor->getOperand(0);
13646     SDValue Op1 = TheXor->getOperand(1);
13647 
13648     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
13649       bool Equal = false;
13650       if (isOneConstant(Op0) && Op0.hasOneUse() &&
13651           Op0.getOpcode() == ISD::XOR) {
13652         TheXor = Op0.getNode();
13653         Equal = true;
13654       }
13655 
13656       EVT SetCCVT = N.getValueType();
13657       if (LegalTypes)
13658         SetCCVT = getSetCCResultType(SetCCVT);
13659       // Replace the uses of XOR with SETCC
13660       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
13661                           Equal ? ISD::SETEQ : ISD::SETNE);
13662     }
13663   }
13664 
13665   return SDValue();
13666 }
13667 
13668 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
13669 //
13670 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
13671   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
13672   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
13673 
13674   // If N is a constant we could fold this into a fallthrough or unconditional
13675   // branch. However that doesn't happen very often in normal code, because
13676   // Instcombine/SimplifyCFG should have handled the available opportunities.
13677   // If we did this folding here, it would be necessary to update the
13678   // MachineBasicBlock CFG, which is awkward.
13679 
13680   // Use SimplifySetCC to simplify SETCC's.
13681   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
13682                                CondLHS, CondRHS, CC->get(), SDLoc(N),
13683                                false);
13684   if (Simp.getNode()) AddToWorklist(Simp.getNode());
13685 
13686   // fold to a simpler setcc
13687   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
13688     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13689                        N->getOperand(0), Simp.getOperand(2),
13690                        Simp.getOperand(0), Simp.getOperand(1),
13691                        N->getOperand(4));
13692 
13693   return SDValue();
13694 }
13695 
13696 /// Return true if 'Use' is a load or a store that uses N as its base pointer
13697 /// and that N may be folded in the load / store addressing mode.
13698 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
13699                                     SelectionDAG &DAG,
13700                                     const TargetLowering &TLI) {
13701   EVT VT;
13702   unsigned AS;
13703 
13704   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
13705     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
13706       return false;
13707     VT = LD->getMemoryVT();
13708     AS = LD->getAddressSpace();
13709   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
13710     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
13711       return false;
13712     VT = ST->getMemoryVT();
13713     AS = ST->getAddressSpace();
13714   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
13715     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
13716       return false;
13717     VT = LD->getMemoryVT();
13718     AS = LD->getAddressSpace();
13719   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
13720     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
13721       return false;
13722     VT = ST->getMemoryVT();
13723     AS = ST->getAddressSpace();
13724   } else
13725     return false;
13726 
13727   TargetLowering::AddrMode AM;
13728   if (N->getOpcode() == ISD::ADD) {
13729     AM.HasBaseReg = true;
13730     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13731     if (Offset)
13732       // [reg +/- imm]
13733       AM.BaseOffs = Offset->getSExtValue();
13734     else
13735       // [reg +/- reg]
13736       AM.Scale = 1;
13737   } else if (N->getOpcode() == ISD::SUB) {
13738     AM.HasBaseReg = true;
13739     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13740     if (Offset)
13741       // [reg +/- imm]
13742       AM.BaseOffs = -Offset->getSExtValue();
13743     else
13744       // [reg +/- reg]
13745       AM.Scale = 1;
13746   } else
13747     return false;
13748 
13749   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
13750                                    VT.getTypeForEVT(*DAG.getContext()), AS);
13751 }
13752 
13753 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
13754                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
13755                                      const TargetLowering &TLI) {
13756   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
13757     if (LD->isIndexed())
13758       return false;
13759     EVT VT = LD->getMemoryVT();
13760     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
13761       return false;
13762     Ptr = LD->getBasePtr();
13763   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
13764     if (ST->isIndexed())
13765       return false;
13766     EVT VT = ST->getMemoryVT();
13767     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
13768       return false;
13769     Ptr = ST->getBasePtr();
13770     IsLoad = false;
13771   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
13772     if (LD->isIndexed())
13773       return false;
13774     EVT VT = LD->getMemoryVT();
13775     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
13776         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
13777       return false;
13778     Ptr = LD->getBasePtr();
13779     IsMasked = true;
13780   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
13781     if (ST->isIndexed())
13782       return false;
13783     EVT VT = ST->getMemoryVT();
13784     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
13785         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
13786       return false;
13787     Ptr = ST->getBasePtr();
13788     IsLoad = false;
13789     IsMasked = true;
13790   } else {
13791     return false;
13792   }
13793   return true;
13794 }
13795 
13796 /// Try turning a load/store into a pre-indexed load/store when the base
13797 /// pointer is an add or subtract and it has other uses besides the load/store.
13798 /// After the transformation, the new indexed load/store has effectively folded
13799 /// the add/subtract in and all of its other uses are redirected to the
13800 /// new load/store.
13801 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13802   if (Level < AfterLegalizeDAG)
13803     return false;
13804 
13805   bool IsLoad = true;
13806   bool IsMasked = false;
13807   SDValue Ptr;
13808   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
13809                                 Ptr, TLI))
13810     return false;
13811 
13812   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13813   // out.  There is no reason to make this a preinc/predec.
13814   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
13815       Ptr.getNode()->hasOneUse())
13816     return false;
13817 
13818   // Ask the target to do addressing mode selection.
13819   SDValue BasePtr;
13820   SDValue Offset;
13821   ISD::MemIndexedMode AM = ISD::UNINDEXED;
13822   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13823     return false;
13824 
13825   // Backends without true r+i pre-indexed forms may need to pass a
13826   // constant base with a variable offset so that constant coercion
13827   // will work with the patterns in canonical form.
13828   bool Swapped = false;
13829   if (isa<ConstantSDNode>(BasePtr)) {
13830     std::swap(BasePtr, Offset);
13831     Swapped = true;
13832   }
13833 
13834   // Don't create a indexed load / store with zero offset.
13835   if (isNullConstant(Offset))
13836     return false;
13837 
13838   // Try turning it into a pre-indexed load / store except when:
13839   // 1) The new base ptr is a frame index.
13840   // 2) If N is a store and the new base ptr is either the same as or is a
13841   //    predecessor of the value being stored.
13842   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13843   //    that would create a cycle.
13844   // 4) All uses are load / store ops that use it as old base ptr.
13845 
13846   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
13847   // (plus the implicit offset) to a register to preinc anyway.
13848   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13849     return false;
13850 
13851   // Check #2.
13852   if (!IsLoad) {
13853     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
13854                            : cast<StoreSDNode>(N)->getValue();
13855 
13856     // Would require a copy.
13857     if (Val == BasePtr)
13858       return false;
13859 
13860     // Would create a cycle.
13861     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
13862       return false;
13863   }
13864 
13865   // Caches for hasPredecessorHelper.
13866   SmallPtrSet<const SDNode *, 32> Visited;
13867   SmallVector<const SDNode *, 16> Worklist;
13868   Worklist.push_back(N);
13869 
13870   // If the offset is a constant, there may be other adds of constants that
13871   // can be folded with this one. We should do this to avoid having to keep
13872   // a copy of the original base pointer.
13873   SmallVector<SDNode *, 16> OtherUses;
13874   if (isa<ConstantSDNode>(Offset))
13875     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13876                               UE = BasePtr.getNode()->use_end();
13877          UI != UE; ++UI) {
13878       SDUse &Use = UI.getUse();
13879       // Skip the use that is Ptr and uses of other results from BasePtr's
13880       // node (important for nodes that return multiple results).
13881       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
13882         continue;
13883 
13884       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
13885         continue;
13886 
13887       if (Use.getUser()->getOpcode() != ISD::ADD &&
13888           Use.getUser()->getOpcode() != ISD::SUB) {
13889         OtherUses.clear();
13890         break;
13891       }
13892 
13893       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
13894       if (!isa<ConstantSDNode>(Op1)) {
13895         OtherUses.clear();
13896         break;
13897       }
13898 
13899       // FIXME: In some cases, we can be smarter about this.
13900       if (Op1.getValueType() != Offset.getValueType()) {
13901         OtherUses.clear();
13902         break;
13903       }
13904 
13905       OtherUses.push_back(Use.getUser());
13906     }
13907 
13908   if (Swapped)
13909     std::swap(BasePtr, Offset);
13910 
13911   // Now check for #3 and #4.
13912   bool RealUse = false;
13913 
13914   for (SDNode *Use : Ptr.getNode()->uses()) {
13915     if (Use == N)
13916       continue;
13917     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
13918       return false;
13919 
13920     // If Ptr may be folded in addressing mode of other use, then it's
13921     // not profitable to do this transformation.
13922     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
13923       RealUse = true;
13924   }
13925 
13926   if (!RealUse)
13927     return false;
13928 
13929   SDValue Result;
13930   if (!IsMasked) {
13931     if (IsLoad)
13932       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
13933     else
13934       Result =
13935           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
13936   } else {
13937     if (IsLoad)
13938       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
13939                                         Offset, AM);
13940     else
13941       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
13942                                          Offset, AM);
13943   }
13944   ++PreIndexedNodes;
13945   ++NodesCombined;
13946   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
13947              Result.getNode()->dump(&DAG); dbgs() << '\n');
13948   WorklistRemover DeadNodes(*this);
13949   if (IsLoad) {
13950     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13951     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13952   } else {
13953     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13954   }
13955 
13956   // Finally, since the node is now dead, remove it from the graph.
13957   deleteAndRecombine(N);
13958 
13959   if (Swapped)
13960     std::swap(BasePtr, Offset);
13961 
13962   // Replace other uses of BasePtr that can be updated to use Ptr
13963   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
13964     unsigned OffsetIdx = 1;
13965     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
13966       OffsetIdx = 0;
13967     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
13968            BasePtr.getNode() && "Expected BasePtr operand");
13969 
13970     // We need to replace ptr0 in the following expression:
13971     //   x0 * offset0 + y0 * ptr0 = t0
13972     // knowing that
13973     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13974     //
13975     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13976     // indexed load/store and the expression that needs to be re-written.
13977     //
13978     // Therefore, we have:
13979     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13980 
13981     ConstantSDNode *CN =
13982       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
13983     int X0, X1, Y0, Y1;
13984     const APInt &Offset0 = CN->getAPIntValue();
13985     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
13986 
13987     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
13988     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
13989     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
13990     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
13991 
13992     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
13993 
13994     APInt CNV = Offset0;
13995     if (X0 < 0) CNV = -CNV;
13996     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
13997     else CNV = CNV - Offset1;
13998 
13999     SDLoc DL(OtherUses[i]);
14000 
14001     // We can now generate the new expression.
14002     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
14003     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
14004 
14005     SDValue NewUse = DAG.getNode(Opcode,
14006                                  DL,
14007                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
14008     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
14009     deleteAndRecombine(OtherUses[i]);
14010   }
14011 
14012   // Replace the uses of Ptr with uses of the updated base value.
14013   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
14014   deleteAndRecombine(Ptr.getNode());
14015   AddToWorklist(Result.getNode());
14016 
14017   return true;
14018 }
14019 
14020 /// Try to combine a load/store with a add/sub of the base pointer node into a
14021 /// post-indexed load/store. The transformation folded the add/subtract into the
14022 /// new indexed load/store effectively and all of its uses are redirected to the
14023 /// new load/store.
14024 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
14025   if (Level < AfterLegalizeDAG)
14026     return false;
14027 
14028   bool IsLoad = true;
14029   bool IsMasked = false;
14030   SDValue Ptr;
14031   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked,
14032                                 Ptr, TLI))
14033     return false;
14034 
14035   if (Ptr.getNode()->hasOneUse())
14036     return false;
14037 
14038   for (SDNode *Op : Ptr.getNode()->uses()) {
14039     if (Op == N ||
14040         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
14041       continue;
14042 
14043     SDValue BasePtr;
14044     SDValue Offset;
14045     ISD::MemIndexedMode AM = ISD::UNINDEXED;
14046     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
14047       // Don't create a indexed load / store with zero offset.
14048       if (isNullConstant(Offset))
14049         continue;
14050 
14051       // Try turning it into a post-indexed load / store except when
14052       // 1) All uses are load / store ops that use it as base ptr (and
14053       //    it may be folded as addressing mmode).
14054       // 2) Op must be independent of N, i.e. Op is neither a predecessor
14055       //    nor a successor of N. Otherwise, if Op is folded that would
14056       //    create a cycle.
14057 
14058       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14059         continue;
14060 
14061       // Check for #1.
14062       bool TryNext = false;
14063       for (SDNode *Use : BasePtr.getNode()->uses()) {
14064         if (Use == Ptr.getNode())
14065           continue;
14066 
14067         // If all the uses are load / store addresses, then don't do the
14068         // transformation.
14069         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
14070           bool RealUse = false;
14071           for (SDNode *UseUse : Use->uses()) {
14072             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
14073               RealUse = true;
14074           }
14075 
14076           if (!RealUse) {
14077             TryNext = true;
14078             break;
14079           }
14080         }
14081       }
14082 
14083       if (TryNext)
14084         continue;
14085 
14086       // Check for #2.
14087       SmallPtrSet<const SDNode *, 32> Visited;
14088       SmallVector<const SDNode *, 8> Worklist;
14089       // Ptr is predecessor to both N and Op.
14090       Visited.insert(Ptr.getNode());
14091       Worklist.push_back(N);
14092       Worklist.push_back(Op);
14093       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
14094           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
14095         SDValue Result;
14096         if (!IsMasked)
14097           Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14098                                                Offset, AM)
14099                           : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
14100                                                 BasePtr, Offset, AM);
14101         else
14102           Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
14103                                                      BasePtr, Offset, AM)
14104                           : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
14105                                                       BasePtr, Offset, AM);
14106         ++PostIndexedNodes;
14107         ++NodesCombined;
14108         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
14109                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
14110                    dbgs() << '\n');
14111         WorklistRemover DeadNodes(*this);
14112         if (IsLoad) {
14113           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14114           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14115         } else {
14116           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14117         }
14118 
14119         // Finally, since the node is now dead, remove it from the graph.
14120         deleteAndRecombine(N);
14121 
14122         // Replace the uses of Use with uses of the updated base value.
14123         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
14124                                       Result.getValue(IsLoad ? 1 : 0));
14125         deleteAndRecombine(Op);
14126         return true;
14127       }
14128     }
14129   }
14130 
14131   return false;
14132 }
14133 
14134 /// Return the base-pointer arithmetic from an indexed \p LD.
14135 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
14136   ISD::MemIndexedMode AM = LD->getAddressingMode();
14137   assert(AM != ISD::UNINDEXED);
14138   SDValue BP = LD->getOperand(1);
14139   SDValue Inc = LD->getOperand(2);
14140 
14141   // Some backends use TargetConstants for load offsets, but don't expect
14142   // TargetConstants in general ADD nodes. We can convert these constants into
14143   // regular Constants (if the constant is not opaque).
14144   assert((Inc.getOpcode() != ISD::TargetConstant ||
14145           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
14146          "Cannot split out indexing using opaque target constants");
14147   if (Inc.getOpcode() == ISD::TargetConstant) {
14148     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
14149     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
14150                           ConstInc->getValueType(0));
14151   }
14152 
14153   unsigned Opc =
14154       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
14155   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
14156 }
14157 
14158 static inline int numVectorEltsOrZero(EVT T) {
14159   return T.isVector() ? T.getVectorNumElements() : 0;
14160 }
14161 
14162 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
14163   Val = ST->getValue();
14164   EVT STType = Val.getValueType();
14165   EVT STMemType = ST->getMemoryVT();
14166   if (STType == STMemType)
14167     return true;
14168   if (isTypeLegal(STMemType))
14169     return false; // fail.
14170   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
14171       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
14172     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
14173     return true;
14174   }
14175   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
14176       STType.isInteger() && STMemType.isInteger()) {
14177     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
14178     return true;
14179   }
14180   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
14181     Val = DAG.getBitcast(STMemType, Val);
14182     return true;
14183   }
14184   return false; // fail.
14185 }
14186 
14187 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
14188   EVT LDMemType = LD->getMemoryVT();
14189   EVT LDType = LD->getValueType(0);
14190   assert(Val.getValueType() == LDMemType &&
14191          "Attempting to extend value of non-matching type");
14192   if (LDType == LDMemType)
14193     return true;
14194   if (LDMemType.isInteger() && LDType.isInteger()) {
14195     switch (LD->getExtensionType()) {
14196     case ISD::NON_EXTLOAD:
14197       Val = DAG.getBitcast(LDType, Val);
14198       return true;
14199     case ISD::EXTLOAD:
14200       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
14201       return true;
14202     case ISD::SEXTLOAD:
14203       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
14204       return true;
14205     case ISD::ZEXTLOAD:
14206       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14207       return true;
14208     }
14209   }
14210   return false;
14211 }
14212 
14213 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14214   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
14215     return SDValue();
14216   SDValue Chain = LD->getOperand(0);
14217   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14218   // TODO: Relax this restriction for unordered atomics (see D66309)
14219   if (!ST || !ST->isSimple())
14220     return SDValue();
14221 
14222   EVT LDType = LD->getValueType(0);
14223   EVT LDMemType = LD->getMemoryVT();
14224   EVT STMemType = ST->getMemoryVT();
14225   EVT STType = ST->getValue().getValueType();
14226 
14227   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14228   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14229   int64_t Offset;
14230   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14231     return SDValue();
14232 
14233   // Normalize for Endianness. After this Offset=0 will denote that the least
14234   // significant bit in the loaded value maps to the least significant bit in
14235   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14236   // n:th least significant byte of the stored value.
14237   if (DAG.getDataLayout().isBigEndian())
14238     Offset = ((int64_t)STMemType.getStoreSizeInBits() -
14239               (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
14240 
14241   // Check that the stored value cover all bits that are loaded.
14242   bool STCoversLD =
14243       (Offset >= 0) &&
14244       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
14245 
14246   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14247     if (LD->isIndexed()) {
14248       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
14249                     LD->getAddressingMode() == ISD::POST_DEC);
14250       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
14251       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
14252                              LD->getOperand(1), LD->getOperand(2));
14253       SDValue Ops[] = {Val, Idx, Chain};
14254       return CombineTo(LD, Ops, 3);
14255     }
14256     return CombineTo(LD, Val, Chain);
14257   };
14258 
14259   if (!STCoversLD)
14260     return SDValue();
14261 
14262   // Memory as copy space (potentially masked).
14263   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
14264     // Simple case: Direct non-truncating forwarding
14265     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
14266       return ReplaceLd(LD, ST->getValue(), Chain);
14267     // Can we model the truncate and extension with an and mask?
14268     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14269         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14270       // Mask to size of LDMemType
14271       auto Mask =
14272           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
14273                                                STMemType.getSizeInBits()),
14274                           SDLoc(ST), STType);
14275       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14276       return ReplaceLd(LD, Val, Chain);
14277     }
14278   }
14279 
14280   // TODO: Deal with nonzero offset.
14281   if (LD->getBasePtr().isUndef() || Offset != 0)
14282     return SDValue();
14283   // Model necessary truncations / extenstions.
14284   SDValue Val;
14285   // Truncate Value To Stored Memory Size.
14286   do {
14287     if (!getTruncatedStoreValue(ST, Val))
14288       continue;
14289     if (!isTypeLegal(LDMemType))
14290       continue;
14291     if (STMemType != LDMemType) {
14292       // TODO: Support vectors? This requires extract_subvector/bitcast.
14293       if (!STMemType.isVector() && !LDMemType.isVector() &&
14294           STMemType.isInteger() && LDMemType.isInteger())
14295         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
14296       else
14297         continue;
14298     }
14299     if (!extendLoadedValueToExtension(LD, Val))
14300       continue;
14301     return ReplaceLd(LD, Val, Chain);
14302   } while (false);
14303 
14304   // On failure, cleanup dead nodes we may have created.
14305   if (Val->use_empty())
14306     deleteAndRecombine(Val.getNode());
14307   return SDValue();
14308 }
14309 
14310 SDValue DAGCombiner::visitLOAD(SDNode *N) {
14311   LoadSDNode *LD  = cast<LoadSDNode>(N);
14312   SDValue Chain = LD->getChain();
14313   SDValue Ptr   = LD->getBasePtr();
14314 
14315   // If load is not volatile and there are no uses of the loaded value (and
14316   // the updated indexed value in case of indexed loads), change uses of the
14317   // chain value into uses of the chain input (i.e. delete the dead load).
14318   // TODO: Allow this for unordered atomics (see D66309)
14319   if (LD->isSimple()) {
14320     if (N->getValueType(1) == MVT::Other) {
14321       // Unindexed loads.
14322       if (!N->hasAnyUseOfValue(0)) {
14323         // It's not safe to use the two value CombineTo variant here. e.g.
14324         // v1, chain2 = load chain1, loc
14325         // v2, chain3 = load chain2, loc
14326         // v3         = add v2, c
14327         // Now we replace use of chain2 with chain1.  This makes the second load
14328         // isomorphic to the one we are deleting, and thus makes this load live.
14329         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
14330                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
14331                    dbgs() << "\n");
14332         WorklistRemover DeadNodes(*this);
14333         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14334         AddUsersToWorklist(Chain.getNode());
14335         if (N->use_empty())
14336           deleteAndRecombine(N);
14337 
14338         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14339       }
14340     } else {
14341       // Indexed loads.
14342       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
14343 
14344       // If this load has an opaque TargetConstant offset, then we cannot split
14345       // the indexing into an add/sub directly (that TargetConstant may not be
14346       // valid for a different type of node, and we cannot convert an opaque
14347       // target constant into a regular constant).
14348       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
14349                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
14350 
14351       if (!N->hasAnyUseOfValue(0) &&
14352           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
14353         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14354         SDValue Index;
14355         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
14356           Index = SplitIndexingFromLoad(LD);
14357           // Try to fold the base pointer arithmetic into subsequent loads and
14358           // stores.
14359           AddUsersToWorklist(N);
14360         } else
14361           Index = DAG.getUNDEF(N->getValueType(1));
14362         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
14363                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
14364                    dbgs() << " and 2 other values\n");
14365         WorklistRemover DeadNodes(*this);
14366         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14367         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14368         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14369         deleteAndRecombine(N);
14370         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14371       }
14372     }
14373   }
14374 
14375   // If this load is directly stored, replace the load value with the stored
14376   // value.
14377   if (auto V = ForwardStoreValueToDirectLoad(LD))
14378     return V;
14379 
14380   // Try to infer better alignment information than the load already has.
14381   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
14382     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14383       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
14384         SDValue NewLoad = DAG.getExtLoad(
14385             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14386             LD->getPointerInfo(), LD->getMemoryVT(), Align,
14387             LD->getMemOperand()->getFlags(), LD->getAAInfo());
14388         // NewLoad will always be N as we are only refining the alignment
14389         assert(NewLoad.getNode() == N);
14390         (void)NewLoad;
14391       }
14392     }
14393   }
14394 
14395   if (LD->isUnindexed()) {
14396     // Walk up chain skipping non-aliasing memory nodes.
14397     SDValue BetterChain = FindBetterChain(LD, Chain);
14398 
14399     // If there is a better chain.
14400     if (Chain != BetterChain) {
14401       SDValue ReplLoad;
14402 
14403       // Replace the chain to void dependency.
14404       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14405         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14406                                BetterChain, Ptr, LD->getMemOperand());
14407       } else {
14408         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14409                                   LD->getValueType(0),
14410                                   BetterChain, Ptr, LD->getMemoryVT(),
14411                                   LD->getMemOperand());
14412       }
14413 
14414       // Create token factor to keep old chain connected.
14415       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14416                                   MVT::Other, Chain, ReplLoad.getValue(1));
14417 
14418       // Replace uses with load result and token factor
14419       return CombineTo(N, ReplLoad.getValue(0), Token);
14420     }
14421   }
14422 
14423   // Try transforming N to an indexed load.
14424   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14425     return SDValue(N, 0);
14426 
14427   // Try to slice up N to more direct loads if the slices are mapped to
14428   // different register banks or pairing can take place.
14429   if (SliceUpLoad(N))
14430     return SDValue(N, 0);
14431 
14432   return SDValue();
14433 }
14434 
14435 namespace {
14436 
14437 /// Helper structure used to slice a load in smaller loads.
14438 /// Basically a slice is obtained from the following sequence:
14439 /// Origin = load Ty1, Base
14440 /// Shift = srl Ty1 Origin, CstTy Amount
14441 /// Inst = trunc Shift to Ty2
14442 ///
14443 /// Then, it will be rewritten into:
14444 /// Slice = load SliceTy, Base + SliceOffset
14445 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14446 ///
14447 /// SliceTy is deduced from the number of bits that are actually used to
14448 /// build Inst.
14449 struct LoadedSlice {
14450   /// Helper structure used to compute the cost of a slice.
14451   struct Cost {
14452     /// Are we optimizing for code size.
14453     bool ForCodeSize = false;
14454 
14455     /// Various cost.
14456     unsigned Loads = 0;
14457     unsigned Truncates = 0;
14458     unsigned CrossRegisterBanksCopies = 0;
14459     unsigned ZExts = 0;
14460     unsigned Shift = 0;
14461 
14462     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
14463 
14464     /// Get the cost of one isolated slice.
14465     Cost(const LoadedSlice &LS, bool ForCodeSize)
14466         : ForCodeSize(ForCodeSize), Loads(1) {
14467       EVT TruncType = LS.Inst->getValueType(0);
14468       EVT LoadedType = LS.getLoadedType();
14469       if (TruncType != LoadedType &&
14470           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14471         ZExts = 1;
14472     }
14473 
14474     /// Account for slicing gain in the current cost.
14475     /// Slicing provide a few gains like removing a shift or a
14476     /// truncate. This method allows to grow the cost of the original
14477     /// load with the gain from this slice.
14478     void addSliceGain(const LoadedSlice &LS) {
14479       // Each slice saves a truncate.
14480       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14481       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14482                               LS.Inst->getValueType(0)))
14483         ++Truncates;
14484       // If there is a shift amount, this slice gets rid of it.
14485       if (LS.Shift)
14486         ++Shift;
14487       // If this slice can merge a cross register bank copy, account for it.
14488       if (LS.canMergeExpensiveCrossRegisterBankCopy())
14489         ++CrossRegisterBanksCopies;
14490     }
14491 
14492     Cost &operator+=(const Cost &RHS) {
14493       Loads += RHS.Loads;
14494       Truncates += RHS.Truncates;
14495       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
14496       ZExts += RHS.ZExts;
14497       Shift += RHS.Shift;
14498       return *this;
14499     }
14500 
14501     bool operator==(const Cost &RHS) const {
14502       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
14503              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
14504              ZExts == RHS.ZExts && Shift == RHS.Shift;
14505     }
14506 
14507     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
14508 
14509     bool operator<(const Cost &RHS) const {
14510       // Assume cross register banks copies are as expensive as loads.
14511       // FIXME: Do we want some more target hooks?
14512       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
14513       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
14514       // Unless we are optimizing for code size, consider the
14515       // expensive operation first.
14516       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
14517         return ExpensiveOpsLHS < ExpensiveOpsRHS;
14518       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
14519              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
14520     }
14521 
14522     bool operator>(const Cost &RHS) const { return RHS < *this; }
14523 
14524     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
14525 
14526     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
14527   };
14528 
14529   // The last instruction that represent the slice. This should be a
14530   // truncate instruction.
14531   SDNode *Inst;
14532 
14533   // The original load instruction.
14534   LoadSDNode *Origin;
14535 
14536   // The right shift amount in bits from the original load.
14537   unsigned Shift;
14538 
14539   // The DAG from which Origin came from.
14540   // This is used to get some contextual information about legal types, etc.
14541   SelectionDAG *DAG;
14542 
14543   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
14544               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
14545       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
14546 
14547   /// Get the bits used in a chunk of bits \p BitWidth large.
14548   /// \return Result is \p BitWidth and has used bits set to 1 and
14549   ///         not used bits set to 0.
14550   APInt getUsedBits() const {
14551     // Reproduce the trunc(lshr) sequence:
14552     // - Start from the truncated value.
14553     // - Zero extend to the desired bit width.
14554     // - Shift left.
14555     assert(Origin && "No original load to compare against.");
14556     unsigned BitWidth = Origin->getValueSizeInBits(0);
14557     assert(Inst && "This slice is not bound to an instruction");
14558     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
14559            "Extracted slice is bigger than the whole type!");
14560     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
14561     UsedBits.setAllBits();
14562     UsedBits = UsedBits.zext(BitWidth);
14563     UsedBits <<= Shift;
14564     return UsedBits;
14565   }
14566 
14567   /// Get the size of the slice to be loaded in bytes.
14568   unsigned getLoadedSize() const {
14569     unsigned SliceSize = getUsedBits().countPopulation();
14570     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
14571     return SliceSize / 8;
14572   }
14573 
14574   /// Get the type that will be loaded for this slice.
14575   /// Note: This may not be the final type for the slice.
14576   EVT getLoadedType() const {
14577     assert(DAG && "Missing context");
14578     LLVMContext &Ctxt = *DAG->getContext();
14579     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
14580   }
14581 
14582   /// Get the alignment of the load used for this slice.
14583   unsigned getAlignment() const {
14584     unsigned Alignment = Origin->getAlignment();
14585     uint64_t Offset = getOffsetFromBase();
14586     if (Offset != 0)
14587       Alignment = MinAlign(Alignment, Alignment + Offset);
14588     return Alignment;
14589   }
14590 
14591   /// Check if this slice can be rewritten with legal operations.
14592   bool isLegal() const {
14593     // An invalid slice is not legal.
14594     if (!Origin || !Inst || !DAG)
14595       return false;
14596 
14597     // Offsets are for indexed load only, we do not handle that.
14598     if (!Origin->getOffset().isUndef())
14599       return false;
14600 
14601     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14602 
14603     // Check that the type is legal.
14604     EVT SliceType = getLoadedType();
14605     if (!TLI.isTypeLegal(SliceType))
14606       return false;
14607 
14608     // Check that the load is legal for this type.
14609     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
14610       return false;
14611 
14612     // Check that the offset can be computed.
14613     // 1. Check its type.
14614     EVT PtrType = Origin->getBasePtr().getValueType();
14615     if (PtrType == MVT::Untyped || PtrType.isExtended())
14616       return false;
14617 
14618     // 2. Check that it fits in the immediate.
14619     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
14620       return false;
14621 
14622     // 3. Check that the computation is legal.
14623     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
14624       return false;
14625 
14626     // Check that the zext is legal if it needs one.
14627     EVT TruncateType = Inst->getValueType(0);
14628     if (TruncateType != SliceType &&
14629         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
14630       return false;
14631 
14632     return true;
14633   }
14634 
14635   /// Get the offset in bytes of this slice in the original chunk of
14636   /// bits.
14637   /// \pre DAG != nullptr.
14638   uint64_t getOffsetFromBase() const {
14639     assert(DAG && "Missing context.");
14640     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
14641     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
14642     uint64_t Offset = Shift / 8;
14643     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
14644     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
14645            "The size of the original loaded type is not a multiple of a"
14646            " byte.");
14647     // If Offset is bigger than TySizeInBytes, it means we are loading all
14648     // zeros. This should have been optimized before in the process.
14649     assert(TySizeInBytes > Offset &&
14650            "Invalid shift amount for given loaded size");
14651     if (IsBigEndian)
14652       Offset = TySizeInBytes - Offset - getLoadedSize();
14653     return Offset;
14654   }
14655 
14656   /// Generate the sequence of instructions to load the slice
14657   /// represented by this object and redirect the uses of this slice to
14658   /// this new sequence of instructions.
14659   /// \pre this->Inst && this->Origin are valid Instructions and this
14660   /// object passed the legal check: LoadedSlice::isLegal returned true.
14661   /// \return The last instruction of the sequence used to load the slice.
14662   SDValue loadSlice() const {
14663     assert(Inst && Origin && "Unable to replace a non-existing slice.");
14664     const SDValue &OldBaseAddr = Origin->getBasePtr();
14665     SDValue BaseAddr = OldBaseAddr;
14666     // Get the offset in that chunk of bytes w.r.t. the endianness.
14667     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
14668     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
14669     if (Offset) {
14670       // BaseAddr = BaseAddr + Offset.
14671       EVT ArithType = BaseAddr.getValueType();
14672       SDLoc DL(Origin);
14673       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
14674                               DAG->getConstant(Offset, DL, ArithType));
14675     }
14676 
14677     // Create the type of the loaded slice according to its size.
14678     EVT SliceType = getLoadedType();
14679 
14680     // Create the load for the slice.
14681     SDValue LastInst =
14682         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
14683                      Origin->getPointerInfo().getWithOffset(Offset),
14684                      getAlignment(), Origin->getMemOperand()->getFlags());
14685     // If the final type is not the same as the loaded type, this means that
14686     // we have to pad with zero. Create a zero extend for that.
14687     EVT FinalType = Inst->getValueType(0);
14688     if (SliceType != FinalType)
14689       LastInst =
14690           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
14691     return LastInst;
14692   }
14693 
14694   /// Check if this slice can be merged with an expensive cross register
14695   /// bank copy. E.g.,
14696   /// i = load i32
14697   /// f = bitcast i32 i to float
14698   bool canMergeExpensiveCrossRegisterBankCopy() const {
14699     if (!Inst || !Inst->hasOneUse())
14700       return false;
14701     SDNode *Use = *Inst->use_begin();
14702     if (Use->getOpcode() != ISD::BITCAST)
14703       return false;
14704     assert(DAG && "Missing context");
14705     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14706     EVT ResVT = Use->getValueType(0);
14707     const TargetRegisterClass *ResRC =
14708         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
14709     const TargetRegisterClass *ArgRC =
14710         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
14711                            Use->getOperand(0)->isDivergent());
14712     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
14713       return false;
14714 
14715     // At this point, we know that we perform a cross-register-bank copy.
14716     // Check if it is expensive.
14717     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
14718     // Assume bitcasts are cheap, unless both register classes do not
14719     // explicitly share a common sub class.
14720     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
14721       return false;
14722 
14723     // Check if it will be merged with the load.
14724     // 1. Check the alignment constraint.
14725     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
14726         ResVT.getTypeForEVT(*DAG->getContext()));
14727 
14728     if (RequiredAlignment > getAlignment())
14729       return false;
14730 
14731     // 2. Check that the load is a legal operation for that type.
14732     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
14733       return false;
14734 
14735     // 3. Check that we do not have a zext in the way.
14736     if (Inst->getValueType(0) != getLoadedType())
14737       return false;
14738 
14739     return true;
14740   }
14741 };
14742 
14743 } // end anonymous namespace
14744 
14745 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
14746 /// \p UsedBits looks like 0..0 1..1 0..0.
14747 static bool areUsedBitsDense(const APInt &UsedBits) {
14748   // If all the bits are one, this is dense!
14749   if (UsedBits.isAllOnesValue())
14750     return true;
14751 
14752   // Get rid of the unused bits on the right.
14753   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
14754   // Get rid of the unused bits on the left.
14755   if (NarrowedUsedBits.countLeadingZeros())
14756     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
14757   // Check that the chunk of bits is completely used.
14758   return NarrowedUsedBits.isAllOnesValue();
14759 }
14760 
14761 /// Check whether or not \p First and \p Second are next to each other
14762 /// in memory. This means that there is no hole between the bits loaded
14763 /// by \p First and the bits loaded by \p Second.
14764 static bool areSlicesNextToEachOther(const LoadedSlice &First,
14765                                      const LoadedSlice &Second) {
14766   assert(First.Origin == Second.Origin && First.Origin &&
14767          "Unable to match different memory origins.");
14768   APInt UsedBits = First.getUsedBits();
14769   assert((UsedBits & Second.getUsedBits()) == 0 &&
14770          "Slices are not supposed to overlap.");
14771   UsedBits |= Second.getUsedBits();
14772   return areUsedBitsDense(UsedBits);
14773 }
14774 
14775 /// Adjust the \p GlobalLSCost according to the target
14776 /// paring capabilities and the layout of the slices.
14777 /// \pre \p GlobalLSCost should account for at least as many loads as
14778 /// there is in the slices in \p LoadedSlices.
14779 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14780                                  LoadedSlice::Cost &GlobalLSCost) {
14781   unsigned NumberOfSlices = LoadedSlices.size();
14782   // If there is less than 2 elements, no pairing is possible.
14783   if (NumberOfSlices < 2)
14784     return;
14785 
14786   // Sort the slices so that elements that are likely to be next to each
14787   // other in memory are next to each other in the list.
14788   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14789     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
14790     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14791   });
14792   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14793   // First (resp. Second) is the first (resp. Second) potentially candidate
14794   // to be placed in a paired load.
14795   const LoadedSlice *First = nullptr;
14796   const LoadedSlice *Second = nullptr;
14797   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14798                 // Set the beginning of the pair.
14799                                                            First = Second) {
14800     Second = &LoadedSlices[CurrSlice];
14801 
14802     // If First is NULL, it means we start a new pair.
14803     // Get to the next slice.
14804     if (!First)
14805       continue;
14806 
14807     EVT LoadedType = First->getLoadedType();
14808 
14809     // If the types of the slices are different, we cannot pair them.
14810     if (LoadedType != Second->getLoadedType())
14811       continue;
14812 
14813     // Check if the target supplies paired loads for this type.
14814     unsigned RequiredAlignment = 0;
14815     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14816       // move to the next pair, this type is hopeless.
14817       Second = nullptr;
14818       continue;
14819     }
14820     // Check if we meet the alignment requirement.
14821     if (RequiredAlignment > First->getAlignment())
14822       continue;
14823 
14824     // Check that both loads are next to each other in memory.
14825     if (!areSlicesNextToEachOther(*First, *Second))
14826       continue;
14827 
14828     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
14829     --GlobalLSCost.Loads;
14830     // Move to the next pair.
14831     Second = nullptr;
14832   }
14833 }
14834 
14835 /// Check the profitability of all involved LoadedSlice.
14836 /// Currently, it is considered profitable if there is exactly two
14837 /// involved slices (1) which are (2) next to each other in memory, and
14838 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14839 ///
14840 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
14841 /// the elements themselves.
14842 ///
14843 /// FIXME: When the cost model will be mature enough, we can relax
14844 /// constraints (1) and (2).
14845 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14846                                 const APInt &UsedBits, bool ForCodeSize) {
14847   unsigned NumberOfSlices = LoadedSlices.size();
14848   if (StressLoadSlicing)
14849     return NumberOfSlices > 1;
14850 
14851   // Check (1).
14852   if (NumberOfSlices != 2)
14853     return false;
14854 
14855   // Check (2).
14856   if (!areUsedBitsDense(UsedBits))
14857     return false;
14858 
14859   // Check (3).
14860   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14861   // The original code has one big load.
14862   OrigCost.Loads = 1;
14863   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
14864     const LoadedSlice &LS = LoadedSlices[CurrSlice];
14865     // Accumulate the cost of all the slices.
14866     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14867     GlobalSlicingCost += SliceCost;
14868 
14869     // Account as cost in the original configuration the gain obtained
14870     // with the current slices.
14871     OrigCost.addSliceGain(LS);
14872   }
14873 
14874   // If the target supports paired load, adjust the cost accordingly.
14875   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14876   return OrigCost > GlobalSlicingCost;
14877 }
14878 
14879 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
14880 /// operations, split it in the various pieces being extracted.
14881 ///
14882 /// This sort of thing is introduced by SROA.
14883 /// This slicing takes care not to insert overlapping loads.
14884 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
14885 bool DAGCombiner::SliceUpLoad(SDNode *N) {
14886   if (Level < AfterLegalizeDAG)
14887     return false;
14888 
14889   LoadSDNode *LD = cast<LoadSDNode>(N);
14890   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
14891       !LD->getValueType(0).isInteger())
14892     return false;
14893 
14894   // Keep track of already used bits to detect overlapping values.
14895   // In that case, we will just abort the transformation.
14896   APInt UsedBits(LD->getValueSizeInBits(0), 0);
14897 
14898   SmallVector<LoadedSlice, 4> LoadedSlices;
14899 
14900   // Check if this load is used as several smaller chunks of bits.
14901   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14902   // of computation for each trunc.
14903   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
14904        UI != UIEnd; ++UI) {
14905     // Skip the uses of the chain.
14906     if (UI.getUse().getResNo() != 0)
14907       continue;
14908 
14909     SDNode *User = *UI;
14910     unsigned Shift = 0;
14911 
14912     // Check if this is a trunc(lshr).
14913     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
14914         isa<ConstantSDNode>(User->getOperand(1))) {
14915       Shift = User->getConstantOperandVal(1);
14916       User = *User->use_begin();
14917     }
14918 
14919     // At this point, User is a Truncate, iff we encountered, trunc or
14920     // trunc(lshr).
14921     if (User->getOpcode() != ISD::TRUNCATE)
14922       return false;
14923 
14924     // The width of the type must be a power of 2 and greater than 8-bits.
14925     // Otherwise the load cannot be represented in LLVM IR.
14926     // Moreover, if we shifted with a non-8-bits multiple, the slice
14927     // will be across several bytes. We do not support that.
14928     unsigned Width = User->getValueSizeInBits(0);
14929     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
14930       return false;
14931 
14932     // Build the slice for this chain of computations.
14933     LoadedSlice LS(User, LD, Shift, &DAG);
14934     APInt CurrentUsedBits = LS.getUsedBits();
14935 
14936     // Check if this slice overlaps with another.
14937     if ((CurrentUsedBits & UsedBits) != 0)
14938       return false;
14939     // Update the bits used globally.
14940     UsedBits |= CurrentUsedBits;
14941 
14942     // Check if the new slice would be legal.
14943     if (!LS.isLegal())
14944       return false;
14945 
14946     // Record the slice.
14947     LoadedSlices.push_back(LS);
14948   }
14949 
14950   // Abort slicing if it does not seem to be profitable.
14951   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
14952     return false;
14953 
14954   ++SlicedLoads;
14955 
14956   // Rewrite each chain to use an independent load.
14957   // By construction, each chain can be represented by a unique load.
14958 
14959   // Prepare the argument for the new token factor for all the slices.
14960   SmallVector<SDValue, 8> ArgChains;
14961   for (SmallVectorImpl<LoadedSlice>::const_iterator
14962            LSIt = LoadedSlices.begin(),
14963            LSItEnd = LoadedSlices.end();
14964        LSIt != LSItEnd; ++LSIt) {
14965     SDValue SliceInst = LSIt->loadSlice();
14966     CombineTo(LSIt->Inst, SliceInst, true);
14967     if (SliceInst.getOpcode() != ISD::LOAD)
14968       SliceInst = SliceInst.getOperand(0);
14969     assert(SliceInst->getOpcode() == ISD::LOAD &&
14970            "It takes more than a zext to get to the loaded slice!!");
14971     ArgChains.push_back(SliceInst.getValue(1));
14972   }
14973 
14974   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
14975                               ArgChains);
14976   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14977   AddToWorklist(Chain.getNode());
14978   return true;
14979 }
14980 
14981 /// Check to see if V is (and load (ptr), imm), where the load is having
14982 /// specific bytes cleared out.  If so, return the byte size being masked out
14983 /// and the shift amount.
14984 static std::pair<unsigned, unsigned>
14985 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
14986   std::pair<unsigned, unsigned> Result(0, 0);
14987 
14988   // Check for the structure we're looking for.
14989   if (V->getOpcode() != ISD::AND ||
14990       !isa<ConstantSDNode>(V->getOperand(1)) ||
14991       !ISD::isNormalLoad(V->getOperand(0).getNode()))
14992     return Result;
14993 
14994   // Check the chain and pointer.
14995   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
14996   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
14997 
14998   // This only handles simple types.
14999   if (V.getValueType() != MVT::i16 &&
15000       V.getValueType() != MVT::i32 &&
15001       V.getValueType() != MVT::i64)
15002     return Result;
15003 
15004   // Check the constant mask.  Invert it so that the bits being masked out are
15005   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
15006   // follow the sign bit for uniformity.
15007   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
15008   unsigned NotMaskLZ = countLeadingZeros(NotMask);
15009   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
15010   unsigned NotMaskTZ = countTrailingZeros(NotMask);
15011   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
15012   if (NotMaskLZ == 64) return Result;  // All zero mask.
15013 
15014   // See if we have a continuous run of bits.  If so, we have 0*1+0*
15015   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
15016     return Result;
15017 
15018   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
15019   if (V.getValueType() != MVT::i64 && NotMaskLZ)
15020     NotMaskLZ -= 64-V.getValueSizeInBits();
15021 
15022   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
15023   switch (MaskedBytes) {
15024   case 1:
15025   case 2:
15026   case 4: break;
15027   default: return Result; // All one mask, or 5-byte mask.
15028   }
15029 
15030   // Verify that the first bit starts at a multiple of mask so that the access
15031   // is aligned the same as the access width.
15032   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
15033 
15034   // For narrowing to be valid, it must be the case that the load the
15035   // immediately preceding memory operation before the store.
15036   if (LD == Chain.getNode())
15037     ; // ok.
15038   else if (Chain->getOpcode() == ISD::TokenFactor &&
15039            SDValue(LD, 1).hasOneUse()) {
15040     // LD has only 1 chain use so they are no indirect dependencies.
15041     if (!LD->isOperandOf(Chain.getNode()))
15042       return Result;
15043   } else
15044     return Result; // Fail.
15045 
15046   Result.first = MaskedBytes;
15047   Result.second = NotMaskTZ/8;
15048   return Result;
15049 }
15050 
15051 /// Check to see if IVal is something that provides a value as specified by
15052 /// MaskInfo. If so, replace the specified store with a narrower store of
15053 /// truncated IVal.
15054 static SDValue
15055 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
15056                                 SDValue IVal, StoreSDNode *St,
15057                                 DAGCombiner *DC) {
15058   unsigned NumBytes = MaskInfo.first;
15059   unsigned ByteShift = MaskInfo.second;
15060   SelectionDAG &DAG = DC->getDAG();
15061 
15062   // Check to see if IVal is all zeros in the part being masked in by the 'or'
15063   // that uses this.  If not, this is not a replacement.
15064   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
15065                                   ByteShift*8, (ByteShift+NumBytes)*8);
15066   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
15067 
15068   // Check that it is legal on the target to do this.  It is legal if the new
15069   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
15070   // legalization (and the target doesn't explicitly think this is a bad idea).
15071   MVT VT = MVT::getIntegerVT(NumBytes * 8);
15072   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15073   if (!DC->isTypeLegal(VT))
15074     return SDValue();
15075   if (St->getMemOperand() &&
15076       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15077                               *St->getMemOperand()))
15078     return SDValue();
15079 
15080   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
15081   // shifted by ByteShift and truncated down to NumBytes.
15082   if (ByteShift) {
15083     SDLoc DL(IVal);
15084     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
15085                        DAG.getConstant(ByteShift*8, DL,
15086                                     DC->getShiftAmountTy(IVal.getValueType())));
15087   }
15088 
15089   // Figure out the offset for the store and the alignment of the access.
15090   unsigned StOffset;
15091   unsigned NewAlign = St->getAlignment();
15092 
15093   if (DAG.getDataLayout().isLittleEndian())
15094     StOffset = ByteShift;
15095   else
15096     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
15097 
15098   SDValue Ptr = St->getBasePtr();
15099   if (StOffset) {
15100     SDLoc DL(IVal);
15101     Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
15102     NewAlign = MinAlign(NewAlign, StOffset);
15103   }
15104 
15105   // Truncate down to the new size.
15106   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
15107 
15108   ++OpsNarrowed;
15109   return DAG
15110       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
15111                 St->getPointerInfo().getWithOffset(StOffset), NewAlign);
15112 }
15113 
15114 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
15115 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
15116 /// narrowing the load and store if it would end up being a win for performance
15117 /// or code size.
15118 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
15119   StoreSDNode *ST  = cast<StoreSDNode>(N);
15120   if (!ST->isSimple())
15121     return SDValue();
15122 
15123   SDValue Chain = ST->getChain();
15124   SDValue Value = ST->getValue();
15125   SDValue Ptr   = ST->getBasePtr();
15126   EVT VT = Value.getValueType();
15127 
15128   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
15129     return SDValue();
15130 
15131   unsigned Opc = Value.getOpcode();
15132 
15133   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
15134   // is a byte mask indicating a consecutive number of bytes, check to see if
15135   // Y is known to provide just those bytes.  If so, we try to replace the
15136   // load + replace + store sequence with a single (narrower) store, which makes
15137   // the load dead.
15138   if (Opc == ISD::OR) {
15139     std::pair<unsigned, unsigned> MaskedLoad;
15140     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
15141     if (MaskedLoad.first)
15142       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15143                                                   Value.getOperand(1), ST,this))
15144         return NewST;
15145 
15146     // Or is commutative, so try swapping X and Y.
15147     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
15148     if (MaskedLoad.first)
15149       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15150                                                   Value.getOperand(0), ST,this))
15151         return NewST;
15152   }
15153 
15154   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
15155       Value.getOperand(1).getOpcode() != ISD::Constant)
15156     return SDValue();
15157 
15158   SDValue N0 = Value.getOperand(0);
15159   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15160       Chain == SDValue(N0.getNode(), 1)) {
15161     LoadSDNode *LD = cast<LoadSDNode>(N0);
15162     if (LD->getBasePtr() != Ptr ||
15163         LD->getPointerInfo().getAddrSpace() !=
15164         ST->getPointerInfo().getAddrSpace())
15165       return SDValue();
15166 
15167     // Find the type to narrow it the load / op / store to.
15168     SDValue N1 = Value.getOperand(1);
15169     unsigned BitWidth = N1.getValueSizeInBits();
15170     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
15171     if (Opc == ISD::AND)
15172       Imm ^= APInt::getAllOnesValue(BitWidth);
15173     if (Imm == 0 || Imm.isAllOnesValue())
15174       return SDValue();
15175     unsigned ShAmt = Imm.countTrailingZeros();
15176     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
15177     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
15178     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15179     // The narrowing should be profitable, the load/store operation should be
15180     // legal (or custom) and the store size should be equal to the NewVT width.
15181     while (NewBW < BitWidth &&
15182            (NewVT.getStoreSizeInBits() != NewBW ||
15183             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
15184             !TLI.isNarrowingProfitable(VT, NewVT))) {
15185       NewBW = NextPowerOf2(NewBW);
15186       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15187     }
15188     if (NewBW >= BitWidth)
15189       return SDValue();
15190 
15191     // If the lsb changed does not start at the type bitwidth boundary,
15192     // start at the previous one.
15193     if (ShAmt % NewBW)
15194       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
15195     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
15196                                    std::min(BitWidth, ShAmt + NewBW));
15197     if ((Imm & Mask) == Imm) {
15198       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
15199       if (Opc == ISD::AND)
15200         NewImm ^= APInt::getAllOnesValue(NewBW);
15201       uint64_t PtrOff = ShAmt / 8;
15202       // For big endian targets, we need to adjust the offset to the pointer to
15203       // load the correct bytes.
15204       if (DAG.getDataLayout().isBigEndian())
15205         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
15206 
15207       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
15208       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
15209       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
15210         return SDValue();
15211 
15212       SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
15213       SDValue NewLD =
15214           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15215                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15216                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
15217       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15218                                    DAG.getConstant(NewImm, SDLoc(Value),
15219                                                    NewVT));
15220       SDValue NewST =
15221           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15222                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15223 
15224       AddToWorklist(NewPtr.getNode());
15225       AddToWorklist(NewLD.getNode());
15226       AddToWorklist(NewVal.getNode());
15227       WorklistRemover DeadNodes(*this);
15228       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15229       ++OpsNarrowed;
15230       return NewST;
15231     }
15232   }
15233 
15234   return SDValue();
15235 }
15236 
15237 /// For a given floating point load / store pair, if the load value isn't used
15238 /// by any other operations, then consider transforming the pair to integer
15239 /// load / store operations if the target deems the transformation profitable.
15240 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15241   StoreSDNode *ST  = cast<StoreSDNode>(N);
15242   SDValue Value = ST->getValue();
15243   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
15244       Value.hasOneUse()) {
15245     LoadSDNode *LD = cast<LoadSDNode>(Value);
15246     EVT VT = LD->getMemoryVT();
15247     if (!VT.isFloatingPoint() ||
15248         VT != ST->getMemoryVT() ||
15249         LD->isNonTemporal() ||
15250         ST->isNonTemporal() ||
15251         LD->getPointerInfo().getAddrSpace() != 0 ||
15252         ST->getPointerInfo().getAddrSpace() != 0)
15253       return SDValue();
15254 
15255     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
15256     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15257         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
15258         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
15259         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
15260       return SDValue();
15261 
15262     unsigned LDAlign = LD->getAlignment();
15263     unsigned STAlign = ST->getAlignment();
15264     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15265     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
15266     if (LDAlign < ABIAlign || STAlign < ABIAlign)
15267       return SDValue();
15268 
15269     SDValue NewLD =
15270         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
15271                     LD->getPointerInfo(), LDAlign);
15272 
15273     SDValue NewST =
15274         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
15275                      ST->getPointerInfo(), STAlign);
15276 
15277     AddToWorklist(NewLD.getNode());
15278     AddToWorklist(NewST.getNode());
15279     WorklistRemover DeadNodes(*this);
15280     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
15281     ++LdStFP2Int;
15282     return NewST;
15283   }
15284 
15285   return SDValue();
15286 }
15287 
15288 // This is a helper function for visitMUL to check the profitability
15289 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
15290 // MulNode is the original multiply, AddNode is (add x, c1),
15291 // and ConstNode is c2.
15292 //
15293 // If the (add x, c1) has multiple uses, we could increase
15294 // the number of adds if we make this transformation.
15295 // It would only be worth doing this if we can remove a
15296 // multiply in the process. Check for that here.
15297 // To illustrate:
15298 //     (A + c1) * c3
15299 //     (A + c2) * c3
15300 // We're checking for cases where we have common "c3 * A" expressions.
15301 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
15302                                               SDValue &AddNode,
15303                                               SDValue &ConstNode) {
15304   APInt Val;
15305 
15306   // If the add only has one use, this would be OK to do.
15307   if (AddNode.getNode()->hasOneUse())
15308     return true;
15309 
15310   // Walk all the users of the constant with which we're multiplying.
15311   for (SDNode *Use : ConstNode->uses()) {
15312     if (Use == MulNode) // This use is the one we're on right now. Skip it.
15313       continue;
15314 
15315     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15316       SDNode *OtherOp;
15317       SDNode *MulVar = AddNode.getOperand(0).getNode();
15318 
15319       // OtherOp is what we're multiplying against the constant.
15320       if (Use->getOperand(0) == ConstNode)
15321         OtherOp = Use->getOperand(1).getNode();
15322       else
15323         OtherOp = Use->getOperand(0).getNode();
15324 
15325       // Check to see if multiply is with the same operand of our "add".
15326       //
15327       //     ConstNode  = CONST
15328       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
15329       //     ...
15330       //     AddNode  = (A + c1)  <-- MulVar is A.
15331       //         = AddNode * ConstNode   <-- current visiting instruction.
15332       //
15333       // If we make this transformation, we will have a common
15334       // multiply (ConstNode * A) that we can save.
15335       if (OtherOp == MulVar)
15336         return true;
15337 
15338       // Now check to see if a future expansion will give us a common
15339       // multiply.
15340       //
15341       //     ConstNode  = CONST
15342       //     AddNode    = (A + c1)
15343       //     ...   = AddNode * ConstNode <-- current visiting instruction.
15344       //     ...
15345       //     OtherOp = (A + c2)
15346       //     Use     = OtherOp * ConstNode <-- visiting Use.
15347       //
15348       // If we make this transformation, we will have a common
15349       // multiply (CONST * A) after we also do the same transformation
15350       // to the "t2" instruction.
15351       if (OtherOp->getOpcode() == ISD::ADD &&
15352           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
15353           OtherOp->getOperand(0).getNode() == MulVar)
15354         return true;
15355     }
15356   }
15357 
15358   // Didn't find a case where this would be profitable.
15359   return false;
15360 }
15361 
15362 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15363                                          unsigned NumStores) {
15364   SmallVector<SDValue, 8> Chains;
15365   SmallPtrSet<const SDNode *, 8> Visited;
15366   SDLoc StoreDL(StoreNodes[0].MemNode);
15367 
15368   for (unsigned i = 0; i < NumStores; ++i) {
15369     Visited.insert(StoreNodes[i].MemNode);
15370   }
15371 
15372   // don't include nodes that are children or repeated nodes.
15373   for (unsigned i = 0; i < NumStores; ++i) {
15374     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15375       Chains.push_back(StoreNodes[i].MemNode->getChain());
15376   }
15377 
15378   assert(Chains.size() > 0 && "Chain should have generated a chain");
15379   return DAG.getTokenFactor(StoreDL, Chains);
15380 }
15381 
15382 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
15383     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15384     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15385   // Make sure we have something to merge.
15386   if (NumStores < 2)
15387     return false;
15388 
15389   // The latest Node in the DAG.
15390   SDLoc DL(StoreNodes[0].MemNode);
15391 
15392   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
15393   unsigned SizeInBits = NumStores * ElementSizeBits;
15394   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15395 
15396   EVT StoreTy;
15397   if (UseVector) {
15398     unsigned Elts = NumStores * NumMemElts;
15399     // Get the type for the merged vector store.
15400     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15401   } else
15402     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15403 
15404   SDValue StoredVal;
15405   if (UseVector) {
15406     if (IsConstantSrc) {
15407       SmallVector<SDValue, 8> BuildVector;
15408       for (unsigned I = 0; I != NumStores; ++I) {
15409         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15410         SDValue Val = St->getValue();
15411         // If constant is of the wrong type, convert it now.
15412         if (MemVT != Val.getValueType()) {
15413           Val = peekThroughBitcasts(Val);
15414           // Deal with constants of wrong size.
15415           if (ElementSizeBits != Val.getValueSizeInBits()) {
15416             EVT IntMemVT =
15417                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
15418             if (isa<ConstantFPSDNode>(Val)) {
15419               // Not clear how to truncate FP values.
15420               return false;
15421             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15422               Val = DAG.getConstant(C->getAPIntValue()
15423                                         .zextOrTrunc(Val.getValueSizeInBits())
15424                                         .zextOrTrunc(ElementSizeBits),
15425                                     SDLoc(C), IntMemVT);
15426           }
15427           // Make sure correctly size type is the correct type.
15428           Val = DAG.getBitcast(MemVT, Val);
15429         }
15430         BuildVector.push_back(Val);
15431       }
15432       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15433                                                : ISD::BUILD_VECTOR,
15434                               DL, StoreTy, BuildVector);
15435     } else {
15436       SmallVector<SDValue, 8> Ops;
15437       for (unsigned i = 0; i < NumStores; ++i) {
15438         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15439         SDValue Val = peekThroughBitcasts(St->getValue());
15440         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15441         // type MemVT. If the underlying value is not the correct
15442         // type, but it is an extraction of an appropriate vector we
15443         // can recast Val to be of the correct type. This may require
15444         // converting between EXTRACT_VECTOR_ELT and
15445         // EXTRACT_SUBVECTOR.
15446         if ((MemVT != Val.getValueType()) &&
15447             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15448              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
15449           EVT MemVTScalarTy = MemVT.getScalarType();
15450           // We may need to add a bitcast here to get types to line up.
15451           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15452             Val = DAG.getBitcast(MemVT, Val);
15453           } else {
15454             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15455                                             : ISD::EXTRACT_VECTOR_ELT;
15456             SDValue Vec = Val.getOperand(0);
15457             SDValue Idx = Val.getOperand(1);
15458             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15459           }
15460         }
15461         Ops.push_back(Val);
15462       }
15463 
15464       // Build the extracted vector elements back into a vector.
15465       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15466                                                : ISD::BUILD_VECTOR,
15467                               DL, StoreTy, Ops);
15468     }
15469   } else {
15470     // We should always use a vector store when merging extracted vector
15471     // elements, so this path implies a store of constants.
15472     assert(IsConstantSrc && "Merged vector elements should use vector store");
15473 
15474     APInt StoreInt(SizeInBits, 0);
15475 
15476     // Construct a single integer constant which is made of the smaller
15477     // constant inputs.
15478     bool IsLE = DAG.getDataLayout().isLittleEndian();
15479     for (unsigned i = 0; i < NumStores; ++i) {
15480       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
15481       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
15482 
15483       SDValue Val = St->getValue();
15484       Val = peekThroughBitcasts(Val);
15485       StoreInt <<= ElementSizeBits;
15486       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
15487         StoreInt |= C->getAPIntValue()
15488                         .zextOrTrunc(ElementSizeBits)
15489                         .zextOrTrunc(SizeInBits);
15490       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
15491         StoreInt |= C->getValueAPF()
15492                         .bitcastToAPInt()
15493                         .zextOrTrunc(ElementSizeBits)
15494                         .zextOrTrunc(SizeInBits);
15495         // If fp truncation is necessary give up for now.
15496         if (MemVT.getSizeInBits() != ElementSizeBits)
15497           return false;
15498       } else {
15499         llvm_unreachable("Invalid constant element type");
15500       }
15501     }
15502 
15503     // Create the new Load and Store operations.
15504     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
15505   }
15506 
15507   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15508   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
15509 
15510   // make sure we use trunc store if it's necessary to be legal.
15511   SDValue NewStore;
15512   if (!UseTrunc) {
15513     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
15514                             FirstInChain->getPointerInfo(),
15515                             FirstInChain->getAlignment());
15516   } else { // Must be realized as a trunc store
15517     EVT LegalizedStoredValTy =
15518         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
15519     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
15520     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
15521     SDValue ExtendedStoreVal =
15522         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
15523                         LegalizedStoredValTy);
15524     NewStore = DAG.getTruncStore(
15525         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
15526         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
15527         FirstInChain->getAlignment(),
15528         FirstInChain->getMemOperand()->getFlags());
15529   }
15530 
15531   // Replace all merged stores with the new store.
15532   for (unsigned i = 0; i < NumStores; ++i)
15533     CombineTo(StoreNodes[i].MemNode, NewStore);
15534 
15535   AddToWorklist(NewChain.getNode());
15536   return true;
15537 }
15538 
15539 void DAGCombiner::getStoreMergeCandidates(
15540     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
15541     SDNode *&RootNode) {
15542   // This holds the base pointer, index, and the offset in bytes from the base
15543   // pointer.
15544   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
15545   EVT MemVT = St->getMemoryVT();
15546 
15547   SDValue Val = peekThroughBitcasts(St->getValue());
15548   // We must have a base and an offset.
15549   if (!BasePtr.getBase().getNode())
15550     return;
15551 
15552   // Do not handle stores to undef base pointers.
15553   if (BasePtr.getBase().isUndef())
15554     return;
15555 
15556   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
15557   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15558                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15559   bool IsLoadSrc = isa<LoadSDNode>(Val);
15560   BaseIndexOffset LBasePtr;
15561   // Match on loadbaseptr if relevant.
15562   EVT LoadVT;
15563   if (IsLoadSrc) {
15564     auto *Ld = cast<LoadSDNode>(Val);
15565     LBasePtr = BaseIndexOffset::match(Ld, DAG);
15566     LoadVT = Ld->getMemoryVT();
15567     // Load and store should be the same type.
15568     if (MemVT != LoadVT)
15569       return;
15570     // Loads must only have one use.
15571     if (!Ld->hasNUsesOfValue(1, 0))
15572       return;
15573     // The memory operands must not be volatile/indexed/atomic.
15574     // TODO: May be able to relax for unordered atomics (see D66309)
15575     if (!Ld->isSimple() || Ld->isIndexed())
15576       return;
15577   }
15578   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
15579                             int64_t &Offset) -> bool {
15580     // The memory operands must not be volatile/indexed/atomic.
15581     // TODO: May be able to relax for unordered atomics (see D66309)
15582     if (!Other->isSimple() ||  Other->isIndexed())
15583       return false;
15584     // Don't mix temporal stores with non-temporal stores.
15585     if (St->isNonTemporal() != Other->isNonTemporal())
15586       return false;
15587     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
15588     // Allow merging constants of different types as integers.
15589     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
15590                                            : Other->getMemoryVT() != MemVT;
15591     if (IsLoadSrc) {
15592       if (NoTypeMatch)
15593         return false;
15594       // The Load's Base Ptr must also match
15595       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
15596         BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
15597         if (LoadVT != OtherLd->getMemoryVT())
15598           return false;
15599         // Loads must only have one use.
15600         if (!OtherLd->hasNUsesOfValue(1, 0))
15601           return false;
15602         // The memory operands must not be volatile/indexed/atomic.
15603         // TODO: May be able to relax for unordered atomics (see D66309)
15604         if (!OtherLd->isSimple() ||
15605             OtherLd->isIndexed())
15606           return false;
15607         // Don't mix temporal loads with non-temporal loads.
15608         if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
15609           return false;
15610         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
15611           return false;
15612       } else
15613         return false;
15614     }
15615     if (IsConstantSrc) {
15616       if (NoTypeMatch)
15617         return false;
15618       if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
15619         return false;
15620     }
15621     if (IsExtractVecSrc) {
15622       // Do not merge truncated stores here.
15623       if (Other->isTruncatingStore())
15624         return false;
15625       if (!MemVT.bitsEq(OtherBC.getValueType()))
15626         return false;
15627       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
15628           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15629         return false;
15630     }
15631     Ptr = BaseIndexOffset::match(Other, DAG);
15632     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
15633   };
15634 
15635   // Check if the pair of StoreNode and the RootNode already bail out many
15636   // times which is over the limit in dependence check.
15637   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
15638                                         SDNode *RootNode) -> bool {
15639     auto RootCount = StoreRootCountMap.find(StoreNode);
15640     if (RootCount != StoreRootCountMap.end() &&
15641         RootCount->second.first == RootNode &&
15642         RootCount->second.second > StoreMergeDependenceLimit)
15643       return true;
15644     return false;
15645   };
15646 
15647   // We looking for a root node which is an ancestor to all mergable
15648   // stores. We search up through a load, to our root and then down
15649   // through all children. For instance we will find Store{1,2,3} if
15650   // St is Store1, Store2. or Store3 where the root is not a load
15651   // which always true for nonvolatile ops. TODO: Expand
15652   // the search to find all valid candidates through multiple layers of loads.
15653   //
15654   // Root
15655   // |-------|-------|
15656   // Load    Load    Store3
15657   // |       |
15658   // Store1   Store2
15659   //
15660   // FIXME: We should be able to climb and
15661   // descend TokenFactors to find candidates as well.
15662 
15663   RootNode = St->getChain().getNode();
15664 
15665   unsigned NumNodesExplored = 0;
15666   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
15667     RootNode = Ldn->getChain().getNode();
15668     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15669          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15670       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
15671         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
15672           if (I2.getOperandNo() == 0)
15673             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
15674               BaseIndexOffset Ptr;
15675               int64_t PtrDiff;
15676               if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
15677                   !OverLimitInDependenceCheck(OtherST, RootNode))
15678                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15679             }
15680   } else
15681     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15682          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15683       if (I.getOperandNo() == 0)
15684         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
15685           BaseIndexOffset Ptr;
15686           int64_t PtrDiff;
15687           if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
15688               !OverLimitInDependenceCheck(OtherST, RootNode))
15689             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15690         }
15691 }
15692 
15693 // We need to check that merging these stores does not cause a loop in
15694 // the DAG. Any store candidate may depend on another candidate
15695 // indirectly through its operand (we already consider dependencies
15696 // through the chain). Check in parallel by searching up from
15697 // non-chain operands of candidates.
15698 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
15699     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
15700     SDNode *RootNode) {
15701   // FIXME: We should be able to truncate a full search of
15702   // predecessors by doing a BFS and keeping tabs the originating
15703   // stores from which worklist nodes come from in a similar way to
15704   // TokenFactor simplfication.
15705 
15706   SmallPtrSet<const SDNode *, 32> Visited;
15707   SmallVector<const SDNode *, 8> Worklist;
15708 
15709   // RootNode is a predecessor to all candidates so we need not search
15710   // past it. Add RootNode (peeking through TokenFactors). Do not count
15711   // these towards size check.
15712 
15713   Worklist.push_back(RootNode);
15714   while (!Worklist.empty()) {
15715     auto N = Worklist.pop_back_val();
15716     if (!Visited.insert(N).second)
15717       continue; // Already present in Visited.
15718     if (N->getOpcode() == ISD::TokenFactor) {
15719       for (SDValue Op : N->ops())
15720         Worklist.push_back(Op.getNode());
15721     }
15722   }
15723 
15724   // Don't count pruning nodes towards max.
15725   unsigned int Max = 1024 + Visited.size();
15726   // Search Ops of store candidates.
15727   for (unsigned i = 0; i < NumStores; ++i) {
15728     SDNode *N = StoreNodes[i].MemNode;
15729     // Of the 4 Store Operands:
15730     //   * Chain (Op 0) -> We have already considered these
15731     //                    in candidate selection and can be
15732     //                    safely ignored
15733     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
15734     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
15735     //                       but aren't necessarily fromt the same base node, so
15736     //                       cycles possible (e.g. via indexed store).
15737     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
15738     //               non-indexed stores). Not constant on all targets (e.g. ARM)
15739     //               and so can participate in a cycle.
15740     for (unsigned j = 1; j < N->getNumOperands(); ++j)
15741       Worklist.push_back(N->getOperand(j).getNode());
15742   }
15743   // Search through DAG. We can stop early if we find a store node.
15744   for (unsigned i = 0; i < NumStores; ++i)
15745     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
15746                                      Max)) {
15747       // If the searching bail out, record the StoreNode and RootNode in the
15748       // StoreRootCountMap. If we have seen the pair many times over a limit,
15749       // we won't add the StoreNode into StoreNodes set again.
15750       if (Visited.size() >= Max) {
15751         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
15752         if (RootCount.first == RootNode)
15753           RootCount.second++;
15754         else
15755           RootCount = {RootNode, 1};
15756       }
15757       return false;
15758     }
15759   return true;
15760 }
15761 
15762 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
15763   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
15764     return false;
15765 
15766   // TODO: Extend this function to merge stores of scalable vectors.
15767   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
15768   // store since we know <vscale x 16 x i8> is exactly twice as large as
15769   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
15770   EVT MemVT = St->getMemoryVT();
15771   if (MemVT.isScalableVector())
15772     return false;
15773 
15774   int64_t ElementSizeBytes = MemVT.getStoreSize();
15775   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15776 
15777   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
15778     return false;
15779 
15780   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
15781       Attribute::NoImplicitFloat);
15782 
15783   // This function cannot currently deal with non-byte-sized memory sizes.
15784   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
15785     return false;
15786 
15787   if (!MemVT.isSimple())
15788     return false;
15789 
15790   // Perform an early exit check. Do not bother looking at stored values that
15791   // are not constants, loads, or extracted vector elements.
15792   SDValue StoredVal = peekThroughBitcasts(St->getValue());
15793   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
15794   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
15795                        isa<ConstantFPSDNode>(StoredVal);
15796   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15797                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15798   bool IsNonTemporalStore = St->isNonTemporal();
15799   bool IsNonTemporalLoad =
15800       IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
15801 
15802   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
15803     return false;
15804 
15805   SmallVector<MemOpLink, 8> StoreNodes;
15806   SDNode *RootNode;
15807   // Find potential store merge candidates by searching through chain sub-DAG
15808   getStoreMergeCandidates(St, StoreNodes, RootNode);
15809 
15810   // Check if there is anything to merge.
15811   if (StoreNodes.size() < 2)
15812     return false;
15813 
15814   // Sort the memory operands according to their distance from the
15815   // base pointer.
15816   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
15817     return LHS.OffsetFromBase < RHS.OffsetFromBase;
15818   });
15819 
15820   // Store Merge attempts to merge the lowest stores. This generally
15821   // works out as if successful, as the remaining stores are checked
15822   // after the first collection of stores is merged. However, in the
15823   // case that a non-mergeable store is found first, e.g., {p[-2],
15824   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15825   // mergeable cases. To prevent this, we prune such stores from the
15826   // front of StoreNodes here.
15827 
15828   bool RV = false;
15829   while (StoreNodes.size() > 1) {
15830     size_t StartIdx = 0;
15831     while ((StartIdx + 1 < StoreNodes.size()) &&
15832            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15833                StoreNodes[StartIdx + 1].OffsetFromBase)
15834       ++StartIdx;
15835 
15836     // Bail if we don't have enough candidates to merge.
15837     if (StartIdx + 1 >= StoreNodes.size())
15838       return RV;
15839 
15840     if (StartIdx)
15841       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15842 
15843     // Scan the memory operations on the chain and find the first
15844     // non-consecutive store memory address.
15845     unsigned NumConsecutiveStores = 1;
15846     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15847     // Check that the addresses are consecutive starting from the second
15848     // element in the list of stores.
15849     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
15850       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15851       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15852         break;
15853       NumConsecutiveStores = i + 1;
15854     }
15855 
15856     if (NumConsecutiveStores < 2) {
15857       StoreNodes.erase(StoreNodes.begin(),
15858                        StoreNodes.begin() + NumConsecutiveStores);
15859       continue;
15860     }
15861 
15862     // The node with the lowest store address.
15863     LLVMContext &Context = *DAG.getContext();
15864     const DataLayout &DL = DAG.getDataLayout();
15865 
15866     // Store the constants into memory as one consecutive store.
15867     if (IsConstantSrc) {
15868       while (NumConsecutiveStores >= 2) {
15869         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15870         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15871         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15872         unsigned LastLegalType = 1;
15873         unsigned LastLegalVectorType = 1;
15874         bool LastIntegerTrunc = false;
15875         bool NonZero = false;
15876         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15877         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15878           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
15879           SDValue StoredVal = ST->getValue();
15880           bool IsElementZero = false;
15881           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
15882             IsElementZero = C->isNullValue();
15883           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
15884             IsElementZero = C->getConstantFPValue()->isNullValue();
15885           if (IsElementZero) {
15886             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
15887               FirstZeroAfterNonZero = i;
15888           }
15889           NonZero |= !IsElementZero;
15890 
15891           // Find a legal type for the constant store.
15892           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15893           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15894           bool IsFast = false;
15895 
15896           // Break early when size is too large to be legal.
15897           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15898             break;
15899 
15900           if (TLI.isTypeLegal(StoreTy) &&
15901               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15902               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15903                                      *FirstInChain->getMemOperand(), &IsFast) &&
15904               IsFast) {
15905             LastIntegerTrunc = false;
15906             LastLegalType = i + 1;
15907             // Or check whether a truncstore is legal.
15908           } else if (TLI.getTypeAction(Context, StoreTy) ==
15909                      TargetLowering::TypePromoteInteger) {
15910             EVT LegalizedStoredValTy =
15911                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
15912             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15913                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15914                 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15915                                        *FirstInChain->getMemOperand(),
15916                                        &IsFast) &&
15917                 IsFast) {
15918               LastIntegerTrunc = true;
15919               LastLegalType = i + 1;
15920             }
15921           }
15922 
15923           // We only use vectors if the constant is known to be zero or the
15924           // target allows it and the function is not marked with the
15925           // noimplicitfloat attribute.
15926           if ((!NonZero ||
15927                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
15928               !NoVectors) {
15929             // Find a legal type for the vector store.
15930             unsigned Elts = (i + 1) * NumMemElts;
15931             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15932             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
15933                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15934                 TLI.allowsMemoryAccess(
15935                     Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
15936                 IsFast)
15937               LastLegalVectorType = i + 1;
15938           }
15939         }
15940 
15941         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
15942         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
15943 
15944         // Check if we found a legal integer type that creates a meaningful
15945         // merge.
15946         if (NumElem < 2) {
15947           // We know that candidate stores are in order and of correct
15948           // shape. While there is no mergeable sequence from the
15949           // beginning one may start later in the sequence. The only
15950           // reason a merge of size N could have failed where another of
15951           // the same size would not have, is if the alignment has
15952           // improved or we've dropped a non-zero value. Drop as many
15953           // candidates as we can here.
15954           unsigned NumSkip = 1;
15955           while (
15956               (NumSkip < NumConsecutiveStores) &&
15957               (NumSkip < FirstZeroAfterNonZero) &&
15958               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15959             NumSkip++;
15960 
15961           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15962           NumConsecutiveStores -= NumSkip;
15963           continue;
15964         }
15965 
15966         // Check that we can merge these candidates without causing a cycle.
15967         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15968                                                       RootNode)) {
15969           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15970           NumConsecutiveStores -= NumElem;
15971           continue;
15972         }
15973 
15974         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
15975                                               UseVector, LastIntegerTrunc);
15976 
15977         // Remove merged stores for next iteration.
15978         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15979         NumConsecutiveStores -= NumElem;
15980       }
15981       continue;
15982     }
15983 
15984     // When extracting multiple vector elements, try to store them
15985     // in one vector store rather than a sequence of scalar stores.
15986     if (IsExtractVecSrc) {
15987       // Loop on Consecutive Stores on success.
15988       while (NumConsecutiveStores >= 2) {
15989         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15990         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15991         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15992         unsigned NumStoresToMerge = 1;
15993         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15994           // Find a legal type for the vector store.
15995           unsigned Elts = (i + 1) * NumMemElts;
15996           EVT Ty =
15997               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15998           bool IsFast;
15999 
16000           // Break early when size is too large to be legal.
16001           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
16002             break;
16003 
16004           if (TLI.isTypeLegal(Ty) &&
16005               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16006               TLI.allowsMemoryAccess(Context, DL, Ty,
16007                                      *FirstInChain->getMemOperand(), &IsFast) &&
16008               IsFast)
16009             NumStoresToMerge = i + 1;
16010         }
16011 
16012         // Check if we found a legal integer type creating a meaningful
16013         // merge.
16014         if (NumStoresToMerge < 2) {
16015           // We know that candidate stores are in order and of correct
16016           // shape. While there is no mergeable sequence from the
16017           // beginning one may start later in the sequence. The only
16018           // reason a merge of size N could have failed where another of
16019           // the same size would not have, is if the alignment has
16020           // improved. Drop as many candidates as we can here.
16021           unsigned NumSkip = 1;
16022           while (
16023               (NumSkip < NumConsecutiveStores) &&
16024               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16025             NumSkip++;
16026 
16027           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16028           NumConsecutiveStores -= NumSkip;
16029           continue;
16030         }
16031 
16032         // Check that we can merge these candidates without causing a cycle.
16033         if (!checkMergeStoreCandidatesForDependencies(
16034                 StoreNodes, NumStoresToMerge, RootNode)) {
16035           StoreNodes.erase(StoreNodes.begin(),
16036                            StoreNodes.begin() + NumStoresToMerge);
16037           NumConsecutiveStores -= NumStoresToMerge;
16038           continue;
16039         }
16040 
16041         RV |= MergeStoresOfConstantsOrVecElts(
16042             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
16043 
16044         StoreNodes.erase(StoreNodes.begin(),
16045                          StoreNodes.begin() + NumStoresToMerge);
16046         NumConsecutiveStores -= NumStoresToMerge;
16047       }
16048       continue;
16049     }
16050 
16051     // Below we handle the case of multiple consecutive stores that
16052     // come from multiple consecutive loads. We merge them into a single
16053     // wide load and a single wide store.
16054 
16055     // Look for load nodes which are used by the stored values.
16056     SmallVector<MemOpLink, 8> LoadNodes;
16057 
16058     // Find acceptable loads. Loads need to have the same chain (token factor),
16059     // must not be zext, volatile, indexed, and they must be consecutive.
16060     BaseIndexOffset LdBasePtr;
16061 
16062     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16063       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16064       SDValue Val = peekThroughBitcasts(St->getValue());
16065       LoadSDNode *Ld = cast<LoadSDNode>(Val);
16066 
16067       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
16068       // If this is not the first ptr that we check.
16069       int64_t LdOffset = 0;
16070       if (LdBasePtr.getBase().getNode()) {
16071         // The base ptr must be the same.
16072         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
16073           break;
16074       } else {
16075         // Check that all other base pointers are the same as this one.
16076         LdBasePtr = LdPtr;
16077       }
16078 
16079       // We found a potential memory operand to merge.
16080       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
16081     }
16082 
16083     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
16084       // If we have load/store pair instructions and we only have two values,
16085       // don't bother merging.
16086       unsigned RequiredAlignment;
16087       if (LoadNodes.size() == 2 &&
16088           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
16089           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
16090         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
16091         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
16092         break;
16093       }
16094       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16095       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16096       unsigned FirstStoreAlign = FirstInChain->getAlignment();
16097       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
16098       unsigned FirstLoadAlign = FirstLoad->getAlignment();
16099 
16100       // Scan the memory operations on the chain and find the first
16101       // non-consecutive load memory address. These variables hold the index in
16102       // the store node array.
16103 
16104       unsigned LastConsecutiveLoad = 1;
16105 
16106       // This variable refers to the size and not index in the array.
16107       unsigned LastLegalVectorType = 1;
16108       unsigned LastLegalIntegerType = 1;
16109       bool isDereferenceable = true;
16110       bool DoIntegerTruncate = false;
16111       StartAddress = LoadNodes[0].OffsetFromBase;
16112       SDValue FirstChain = FirstLoad->getChain();
16113       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
16114         // All loads must share the same chain.
16115         if (LoadNodes[i].MemNode->getChain() != FirstChain)
16116           break;
16117 
16118         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
16119         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16120           break;
16121         LastConsecutiveLoad = i;
16122 
16123         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
16124           isDereferenceable = false;
16125 
16126         // Find a legal type for the vector store.
16127         unsigned Elts = (i + 1) * NumMemElts;
16128         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16129 
16130         // Break early when size is too large to be legal.
16131         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16132           break;
16133 
16134         bool IsFastSt, IsFastLd;
16135         if (TLI.isTypeLegal(StoreTy) &&
16136             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16137             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16138                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
16139             IsFastSt &&
16140             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16141                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
16142             IsFastLd) {
16143           LastLegalVectorType = i + 1;
16144         }
16145 
16146         // Find a legal type for the integer store.
16147         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16148         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16149         if (TLI.isTypeLegal(StoreTy) &&
16150             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16151             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16152                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
16153             IsFastSt &&
16154             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16155                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
16156             IsFastLd) {
16157           LastLegalIntegerType = i + 1;
16158           DoIntegerTruncate = false;
16159           // Or check whether a truncstore and extload is legal.
16160         } else if (TLI.getTypeAction(Context, StoreTy) ==
16161                    TargetLowering::TypePromoteInteger) {
16162           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
16163           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16164               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16165               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
16166                                  StoreTy) &&
16167               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
16168                                  StoreTy) &&
16169               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
16170               TLI.allowsMemoryAccess(Context, DL, StoreTy,
16171                                      *FirstInChain->getMemOperand(),
16172                                      &IsFastSt) &&
16173               IsFastSt &&
16174               TLI.allowsMemoryAccess(Context, DL, StoreTy,
16175                                      *FirstLoad->getMemOperand(), &IsFastLd) &&
16176               IsFastLd) {
16177             LastLegalIntegerType = i + 1;
16178             DoIntegerTruncate = true;
16179           }
16180         }
16181       }
16182 
16183       // Only use vector types if the vector type is larger than the integer
16184       // type. If they are the same, use integers.
16185       bool UseVectorTy =
16186           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
16187       unsigned LastLegalType =
16188           std::max(LastLegalVectorType, LastLegalIntegerType);
16189 
16190       // We add +1 here because the LastXXX variables refer to location while
16191       // the NumElem refers to array/index size.
16192       unsigned NumElem =
16193           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
16194       NumElem = std::min(LastLegalType, NumElem);
16195 
16196       if (NumElem < 2) {
16197         // We know that candidate stores are in order and of correct
16198         // shape. While there is no mergeable sequence from the
16199         // beginning one may start later in the sequence. The only
16200         // reason a merge of size N could have failed where another of
16201         // the same size would not have is if the alignment or either
16202         // the load or store has improved. Drop as many candidates as we
16203         // can here.
16204         unsigned NumSkip = 1;
16205         while ((NumSkip < LoadNodes.size()) &&
16206                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
16207                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16208           NumSkip++;
16209         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16210         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
16211         NumConsecutiveStores -= NumSkip;
16212         continue;
16213       }
16214 
16215       // Check that we can merge these candidates without causing a cycle.
16216       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16217                                                     RootNode)) {
16218         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16219         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16220         NumConsecutiveStores -= NumElem;
16221         continue;
16222       }
16223 
16224       // Find if it is better to use vectors or integers to load and store
16225       // to memory.
16226       EVT JointMemOpVT;
16227       if (UseVectorTy) {
16228         // Find a legal type for the vector store.
16229         unsigned Elts = NumElem * NumMemElts;
16230         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16231       } else {
16232         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
16233         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
16234       }
16235 
16236       SDLoc LoadDL(LoadNodes[0].MemNode);
16237       SDLoc StoreDL(StoreNodes[0].MemNode);
16238 
16239       // The merged loads are required to have the same incoming chain, so
16240       // using the first's chain is acceptable.
16241 
16242       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
16243       AddToWorklist(NewStoreChain.getNode());
16244 
16245       MachineMemOperand::Flags LdMMOFlags =
16246           isDereferenceable ? MachineMemOperand::MODereferenceable
16247                             : MachineMemOperand::MONone;
16248       if (IsNonTemporalLoad)
16249         LdMMOFlags |= MachineMemOperand::MONonTemporal;
16250 
16251       MachineMemOperand::Flags StMMOFlags =
16252           IsNonTemporalStore ? MachineMemOperand::MONonTemporal
16253                              : MachineMemOperand::MONone;
16254 
16255       SDValue NewLoad, NewStore;
16256       if (UseVectorTy || !DoIntegerTruncate) {
16257         NewLoad =
16258             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
16259                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16260                         FirstLoadAlign, LdMMOFlags);
16261         NewStore = DAG.getStore(
16262             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
16263             FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16264       } else { // This must be the truncstore/extload case
16265         EVT ExtendedTy =
16266             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16267         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16268                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
16269                                  FirstLoad->getPointerInfo(), JointMemOpVT,
16270                                  FirstLoadAlign, LdMMOFlags);
16271         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
16272                                      FirstInChain->getBasePtr(),
16273                                      FirstInChain->getPointerInfo(),
16274                                      JointMemOpVT, FirstInChain->getAlignment(),
16275                                      FirstInChain->getMemOperand()->getFlags());
16276       }
16277 
16278       // Transfer chain users from old loads to the new load.
16279       for (unsigned i = 0; i < NumElem; ++i) {
16280         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16281         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
16282                                       SDValue(NewLoad.getNode(), 1));
16283       }
16284 
16285       // Replace the all stores with the new store. Recursively remove
16286       // corresponding value if its no longer used.
16287       for (unsigned i = 0; i < NumElem; ++i) {
16288         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16289         CombineTo(StoreNodes[i].MemNode, NewStore);
16290         if (Val.getNode()->use_empty())
16291           recursivelyDeleteUnusedNodes(Val.getNode());
16292       }
16293 
16294       RV = true;
16295       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16296       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16297       NumConsecutiveStores -= NumElem;
16298     }
16299   }
16300   return RV;
16301 }
16302 
16303 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
16304   SDLoc SL(ST);
16305   SDValue ReplStore;
16306 
16307   // Replace the chain to avoid dependency.
16308   if (ST->isTruncatingStore()) {
16309     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
16310                                   ST->getBasePtr(), ST->getMemoryVT(),
16311                                   ST->getMemOperand());
16312   } else {
16313     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
16314                              ST->getMemOperand());
16315   }
16316 
16317   // Create token to keep both nodes around.
16318   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16319                               MVT::Other, ST->getChain(), ReplStore);
16320 
16321   // Make sure the new and old chains are cleaned up.
16322   AddToWorklist(Token.getNode());
16323 
16324   // Don't add users to work list.
16325   return CombineTo(ST, Token, false);
16326 }
16327 
16328 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16329   SDValue Value = ST->getValue();
16330   if (Value.getOpcode() == ISD::TargetConstantFP)
16331     return SDValue();
16332 
16333   if (!ISD::isNormalStore(ST))
16334     return SDValue();
16335 
16336   SDLoc DL(ST);
16337 
16338   SDValue Chain = ST->getChain();
16339   SDValue Ptr = ST->getBasePtr();
16340 
16341   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16342 
16343   // NOTE: If the original store is volatile, this transform must not increase
16344   // the number of stores.  For example, on x86-32 an f64 can be stored in one
16345   // processor operation but an i64 (which is not legal) requires two.  So the
16346   // transform should not be done in this case.
16347 
16348   SDValue Tmp;
16349   switch (CFP->getSimpleValueType(0).SimpleTy) {
16350   default:
16351     llvm_unreachable("Unknown FP type");
16352   case MVT::f16:    // We don't do this for these yet.
16353   case MVT::f80:
16354   case MVT::f128:
16355   case MVT::ppcf128:
16356     return SDValue();
16357   case MVT::f32:
16358     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
16359         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16360       ;
16361       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16362                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16363                             MVT::i32);
16364       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16365     }
16366 
16367     return SDValue();
16368   case MVT::f64:
16369     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
16370          ST->isSimple()) ||
16371         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
16372       ;
16373       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16374                             getZExtValue(), SDLoc(CFP), MVT::i64);
16375       return DAG.getStore(Chain, DL, Tmp,
16376                           Ptr, ST->getMemOperand());
16377     }
16378 
16379     if (ST->isSimple() &&
16380         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16381       // Many FP stores are not made apparent until after legalize, e.g. for
16382       // argument passing.  Since this is so common, custom legalize the
16383       // 64-bit integer store into two 32-bit stores.
16384       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16385       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16386       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16387       if (DAG.getDataLayout().isBigEndian())
16388         std::swap(Lo, Hi);
16389 
16390       unsigned Alignment = ST->getAlignment();
16391       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16392       AAMDNodes AAInfo = ST->getAAInfo();
16393 
16394       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16395                                  ST->getAlignment(), MMOFlags, AAInfo);
16396       Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
16397       Alignment = MinAlign(Alignment, 4U);
16398       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16399                                  ST->getPointerInfo().getWithOffset(4),
16400                                  Alignment, MMOFlags, AAInfo);
16401       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16402                          St0, St1);
16403     }
16404 
16405     return SDValue();
16406   }
16407 }
16408 
16409 SDValue DAGCombiner::visitSTORE(SDNode *N) {
16410   StoreSDNode *ST  = cast<StoreSDNode>(N);
16411   SDValue Chain = ST->getChain();
16412   SDValue Value = ST->getValue();
16413   SDValue Ptr   = ST->getBasePtr();
16414 
16415   // If this is a store of a bit convert, store the input value if the
16416   // resultant store does not need a higher alignment than the original.
16417   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
16418       ST->isUnindexed()) {
16419     EVT SVT = Value.getOperand(0).getValueType();
16420     // If the store is volatile, we only want to change the store type if the
16421     // resulting store is legal. Otherwise we might increase the number of
16422     // memory accesses. We don't care if the original type was legal or not
16423     // as we assume software couldn't rely on the number of accesses of an
16424     // illegal type.
16425     // TODO: May be able to relax for unordered atomics (see D66309)
16426     if (((!LegalOperations && ST->isSimple()) ||
16427          TLI.isOperationLegal(ISD::STORE, SVT)) &&
16428         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
16429                                      DAG, *ST->getMemOperand())) {
16430       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
16431                           ST->getMemOperand());
16432     }
16433   }
16434 
16435   // Turn 'store undef, Ptr' -> nothing.
16436   if (Value.isUndef() && ST->isUnindexed())
16437     return Chain;
16438 
16439   // Try to infer better alignment information than the store already has.
16440   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
16441     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
16442       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
16443         SDValue NewStore =
16444             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
16445                               ST->getMemoryVT(), Align,
16446                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
16447         // NewStore will always be N as we are only refining the alignment
16448         assert(NewStore.getNode() == N);
16449         (void)NewStore;
16450       }
16451     }
16452   }
16453 
16454   // Try transforming a pair floating point load / store ops to integer
16455   // load / store ops.
16456   if (SDValue NewST = TransformFPLoadStorePair(N))
16457     return NewST;
16458 
16459   // Try transforming several stores into STORE (BSWAP).
16460   if (SDValue Store = MatchStoreCombine(ST))
16461     return Store;
16462 
16463   if (ST->isUnindexed()) {
16464     // Walk up chain skipping non-aliasing memory nodes, on this store and any
16465     // adjacent stores.
16466     if (findBetterNeighborChains(ST)) {
16467       // replaceStoreChain uses CombineTo, which handled all of the worklist
16468       // manipulation. Return the original node to not do anything else.
16469       return SDValue(ST, 0);
16470     }
16471     Chain = ST->getChain();
16472   }
16473 
16474   // FIXME: is there such a thing as a truncating indexed store?
16475   if (ST->isTruncatingStore() && ST->isUnindexed() &&
16476       Value.getValueType().isInteger() &&
16477       (!isa<ConstantSDNode>(Value) ||
16478        !cast<ConstantSDNode>(Value)->isOpaque())) {
16479     APInt TruncDemandedBits =
16480         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
16481                              ST->getMemoryVT().getScalarSizeInBits());
16482 
16483     // See if we can simplify the input to this truncstore with knowledge that
16484     // only the low bits are being used.  For example:
16485     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
16486     AddToWorklist(Value.getNode());
16487     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
16488       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
16489                                ST->getMemOperand());
16490 
16491     // Otherwise, see if we can simplify the operation with
16492     // SimplifyDemandedBits, which only works if the value has a single use.
16493     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
16494       // Re-visit the store if anything changed and the store hasn't been merged
16495       // with another node (N is deleted) SimplifyDemandedBits will add Value's
16496       // node back to the worklist if necessary, but we also need to re-visit
16497       // the Store node itself.
16498       if (N->getOpcode() != ISD::DELETED_NODE)
16499         AddToWorklist(N);
16500       return SDValue(N, 0);
16501     }
16502   }
16503 
16504   // If this is a load followed by a store to the same location, then the store
16505   // is dead/noop.
16506   // TODO: Can relax for unordered atomics (see D66309)
16507   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
16508     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
16509         ST->isUnindexed() && ST->isSimple() &&
16510         // There can't be any side effects between the load and store, such as
16511         // a call or store.
16512         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
16513       // The store is dead, remove it.
16514       return Chain;
16515     }
16516   }
16517 
16518   // TODO: Can relax for unordered atomics (see D66309)
16519   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
16520     if (ST->isUnindexed() && ST->isSimple() &&
16521         ST1->isUnindexed() && ST1->isSimple()) {
16522       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
16523           ST->getMemoryVT() == ST1->getMemoryVT()) {
16524         // If this is a store followed by a store with the same value to the
16525         // same location, then the store is dead/noop.
16526         return Chain;
16527       }
16528 
16529       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
16530           !ST1->getBasePtr().isUndef()) {
16531         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
16532         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
16533         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
16534         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
16535         // If this is a store who's preceding store to a subset of the current
16536         // location and no one other node is chained to that store we can
16537         // effectively drop the store. Do not remove stores to undef as they may
16538         // be used as data sinks.
16539         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
16540           CombineTo(ST1, ST1->getChain());
16541           return SDValue();
16542         }
16543 
16544         // If ST stores to a subset of preceding store's write set, we may be
16545         // able to fold ST's value into the preceding stored value. As we know
16546         // the other uses of ST1's chain are unconcerned with ST, this folding
16547         // will not affect those nodes.
16548         int64_t BitOffset;
16549         if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
16550                                BitOffset)) {
16551           SDValue ChainValue = ST1->getValue();
16552           if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
16553             if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
16554               APInt Val = C1->getAPIntValue();
16555               APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
16556               // FIXME: Handle Big-endian mode.
16557               if (!DAG.getDataLayout().isBigEndian()) {
16558                 Val.insertBits(InsertVal, BitOffset);
16559                 SDValue NewSDVal =
16560                     DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
16561                                     C1->isTargetOpcode(), C1->isOpaque());
16562                 SDNode *NewST1 = DAG.UpdateNodeOperands(
16563                     ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
16564                     ST1->getOperand(3));
16565                 return CombineTo(ST, SDValue(NewST1, 0));
16566               }
16567             }
16568           }
16569         } // End ST subset of ST1 case.
16570       }
16571     }
16572   }
16573 
16574   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
16575   // truncating store.  We can do this even if this is already a truncstore.
16576   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
16577       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
16578       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
16579                             ST->getMemoryVT())) {
16580     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
16581                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
16582   }
16583 
16584   // Always perform this optimization before types are legal. If the target
16585   // prefers, also try this after legalization to catch stores that were created
16586   // by intrinsics or other nodes.
16587   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
16588     while (true) {
16589       // There can be multiple store sequences on the same chain.
16590       // Keep trying to merge store sequences until we are unable to do so
16591       // or until we merge the last store on the chain.
16592       bool Changed = MergeConsecutiveStores(ST);
16593       if (!Changed) break;
16594       // Return N as merge only uses CombineTo and no worklist clean
16595       // up is necessary.
16596       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
16597         return SDValue(N, 0);
16598     }
16599   }
16600 
16601   // Try transforming N to an indexed store.
16602   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16603     return SDValue(N, 0);
16604 
16605   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
16606   //
16607   // Make sure to do this only after attempting to merge stores in order to
16608   //  avoid changing the types of some subset of stores due to visit order,
16609   //  preventing their merging.
16610   if (isa<ConstantFPSDNode>(ST->getValue())) {
16611     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
16612       return NewSt;
16613   }
16614 
16615   if (SDValue NewSt = splitMergedValStore(ST))
16616     return NewSt;
16617 
16618   return ReduceLoadOpStoreWidth(N);
16619 }
16620 
16621 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
16622   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
16623   if (!LifetimeEnd->hasOffset())
16624     return SDValue();
16625 
16626   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
16627                                         LifetimeEnd->getOffset(), false);
16628 
16629   // We walk up the chains to find stores.
16630   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
16631   while (!Chains.empty()) {
16632     SDValue Chain = Chains.back();
16633     Chains.pop_back();
16634     if (!Chain.hasOneUse())
16635       continue;
16636     switch (Chain.getOpcode()) {
16637     case ISD::TokenFactor:
16638       for (unsigned Nops = Chain.getNumOperands(); Nops;)
16639         Chains.push_back(Chain.getOperand(--Nops));
16640       break;
16641     case ISD::LIFETIME_START:
16642     case ISD::LIFETIME_END:
16643       // We can forward past any lifetime start/end that can be proven not to
16644       // alias the node.
16645       if (!isAlias(Chain.getNode(), N))
16646         Chains.push_back(Chain.getOperand(0));
16647       break;
16648     case ISD::STORE: {
16649       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
16650       // TODO: Can relax for unordered atomics (see D66309)
16651       if (!ST->isSimple() || ST->isIndexed())
16652         continue;
16653       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
16654       // If we store purely within object bounds just before its lifetime ends,
16655       // we can remove the store.
16656       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
16657                                    ST->getMemoryVT().getStoreSizeInBits())) {
16658         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
16659                    dbgs() << "\nwithin LIFETIME_END of : ";
16660                    LifetimeEndBase.dump(); dbgs() << "\n");
16661         CombineTo(ST, ST->getChain());
16662         return SDValue(N, 0);
16663       }
16664     }
16665     }
16666   }
16667   return SDValue();
16668 }
16669 
16670 /// For the instruction sequence of store below, F and I values
16671 /// are bundled together as an i64 value before being stored into memory.
16672 /// Sometimes it is more efficent to generate separate stores for F and I,
16673 /// which can remove the bitwise instructions or sink them to colder places.
16674 ///
16675 ///   (store (or (zext (bitcast F to i32) to i64),
16676 ///              (shl (zext I to i64), 32)), addr)  -->
16677 ///   (store F, addr) and (store I, addr+4)
16678 ///
16679 /// Similarly, splitting for other merged store can also be beneficial, like:
16680 /// For pair of {i32, i32}, i64 store --> two i32 stores.
16681 /// For pair of {i32, i16}, i64 store --> two i32 stores.
16682 /// For pair of {i16, i16}, i32 store --> two i16 stores.
16683 /// For pair of {i16, i8},  i32 store --> two i16 stores.
16684 /// For pair of {i8, i8},   i16 store --> two i8 stores.
16685 ///
16686 /// We allow each target to determine specifically which kind of splitting is
16687 /// supported.
16688 ///
16689 /// The store patterns are commonly seen from the simple code snippet below
16690 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
16691 ///   void goo(const std::pair<int, float> &);
16692 ///   hoo() {
16693 ///     ...
16694 ///     goo(std::make_pair(tmp, ftmp));
16695 ///     ...
16696 ///   }
16697 ///
16698 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
16699   if (OptLevel == CodeGenOpt::None)
16700     return SDValue();
16701 
16702   // Can't change the number of memory accesses for a volatile store or break
16703   // atomicity for an atomic one.
16704   if (!ST->isSimple())
16705     return SDValue();
16706 
16707   SDValue Val = ST->getValue();
16708   SDLoc DL(ST);
16709 
16710   // Match OR operand.
16711   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
16712     return SDValue();
16713 
16714   // Match SHL operand and get Lower and Higher parts of Val.
16715   SDValue Op1 = Val.getOperand(0);
16716   SDValue Op2 = Val.getOperand(1);
16717   SDValue Lo, Hi;
16718   if (Op1.getOpcode() != ISD::SHL) {
16719     std::swap(Op1, Op2);
16720     if (Op1.getOpcode() != ISD::SHL)
16721       return SDValue();
16722   }
16723   Lo = Op2;
16724   Hi = Op1.getOperand(0);
16725   if (!Op1.hasOneUse())
16726     return SDValue();
16727 
16728   // Match shift amount to HalfValBitSize.
16729   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
16730   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
16731   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
16732     return SDValue();
16733 
16734   // Lo and Hi are zero-extended from int with size less equal than 32
16735   // to i64.
16736   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
16737       !Lo.getOperand(0).getValueType().isScalarInteger() ||
16738       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
16739       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
16740       !Hi.getOperand(0).getValueType().isScalarInteger() ||
16741       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
16742     return SDValue();
16743 
16744   // Use the EVT of low and high parts before bitcast as the input
16745   // of target query.
16746   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
16747                   ? Lo.getOperand(0).getValueType()
16748                   : Lo.getValueType();
16749   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
16750                    ? Hi.getOperand(0).getValueType()
16751                    : Hi.getValueType();
16752   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
16753     return SDValue();
16754 
16755   // Start to split store.
16756   unsigned Alignment = ST->getAlignment();
16757   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16758   AAMDNodes AAInfo = ST->getAAInfo();
16759 
16760   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
16761   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
16762   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
16763   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
16764 
16765   SDValue Chain = ST->getChain();
16766   SDValue Ptr = ST->getBasePtr();
16767   // Lower value store.
16768   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16769                              ST->getAlignment(), MMOFlags, AAInfo);
16770   Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
16771   // Higher value store.
16772   SDValue St1 =
16773       DAG.getStore(St0, DL, Hi, Ptr,
16774                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
16775                    Alignment / 2, MMOFlags, AAInfo);
16776   return St1;
16777 }
16778 
16779 /// Convert a disguised subvector insertion into a shuffle:
16780 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
16781   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
16782          "Expected extract_vector_elt");
16783   SDValue InsertVal = N->getOperand(1);
16784   SDValue Vec = N->getOperand(0);
16785 
16786   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
16787   // InsIndex)
16788   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
16789   //   CONCAT_VECTORS.
16790   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
16791       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16792       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
16793     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
16794     ArrayRef<int> Mask = SVN->getMask();
16795 
16796     SDValue X = Vec.getOperand(0);
16797     SDValue Y = Vec.getOperand(1);
16798 
16799     // Vec's operand 0 is using indices from 0 to N-1 and
16800     // operand 1 from N to 2N - 1, where N is the number of
16801     // elements in the vectors.
16802     SDValue InsertVal0 = InsertVal.getOperand(0);
16803     int ElementOffset = -1;
16804 
16805     // We explore the inputs of the shuffle in order to see if we find the
16806     // source of the extract_vector_elt. If so, we can use it to modify the
16807     // shuffle rather than perform an insert_vector_elt.
16808     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
16809     ArgWorkList.emplace_back(Mask.size(), Y);
16810     ArgWorkList.emplace_back(0, X);
16811 
16812     while (!ArgWorkList.empty()) {
16813       int ArgOffset;
16814       SDValue ArgVal;
16815       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
16816 
16817       if (ArgVal == InsertVal0) {
16818         ElementOffset = ArgOffset;
16819         break;
16820       }
16821 
16822       // Peek through concat_vector.
16823       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
16824         int CurrentArgOffset =
16825             ArgOffset + ArgVal.getValueType().getVectorNumElements();
16826         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
16827         for (SDValue Op : reverse(ArgVal->ops())) {
16828           CurrentArgOffset -= Step;
16829           ArgWorkList.emplace_back(CurrentArgOffset, Op);
16830         }
16831 
16832         // Make sure we went through all the elements and did not screw up index
16833         // computation.
16834         assert(CurrentArgOffset == ArgOffset);
16835       }
16836     }
16837 
16838     if (ElementOffset != -1) {
16839       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
16840 
16841       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
16842       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
16843       assert(NewMask[InsIndex] <
16844                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
16845              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
16846 
16847       SDValue LegalShuffle =
16848               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
16849                                           Y, NewMask, DAG);
16850       if (LegalShuffle)
16851         return LegalShuffle;
16852     }
16853   }
16854 
16855   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
16856   // bitcast(shuffle (bitcast V), (extended X), Mask)
16857   // Note: We do not use an insert_subvector node because that requires a
16858   // legal subvector type.
16859   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
16860       !InsertVal.getOperand(0).getValueType().isVector())
16861     return SDValue();
16862 
16863   SDValue SubVec = InsertVal.getOperand(0);
16864   SDValue DestVec = N->getOperand(0);
16865   EVT SubVecVT = SubVec.getValueType();
16866   EVT VT = DestVec.getValueType();
16867   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
16868   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
16869   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
16870 
16871   // Step 1: Create a shuffle mask that implements this insert operation. The
16872   // vector that we are inserting into will be operand 0 of the shuffle, so
16873   // those elements are just 'i'. The inserted subvector is in the first
16874   // positions of operand 1 of the shuffle. Example:
16875   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
16876   SmallVector<int, 16> Mask(NumMaskVals);
16877   for (unsigned i = 0; i != NumMaskVals; ++i) {
16878     if (i / NumSrcElts == InsIndex)
16879       Mask[i] = (i % NumSrcElts) + NumMaskVals;
16880     else
16881       Mask[i] = i;
16882   }
16883 
16884   // Bail out if the target can not handle the shuffle we want to create.
16885   EVT SubVecEltVT = SubVecVT.getVectorElementType();
16886   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
16887   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
16888     return SDValue();
16889 
16890   // Step 2: Create a wide vector from the inserted source vector by appending
16891   // undefined elements. This is the same size as our destination vector.
16892   SDLoc DL(N);
16893   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
16894   ConcatOps[0] = SubVec;
16895   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
16896 
16897   // Step 3: Shuffle in the padded subvector.
16898   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
16899   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
16900   AddToWorklist(PaddedSubV.getNode());
16901   AddToWorklist(DestVecBC.getNode());
16902   AddToWorklist(Shuf.getNode());
16903   return DAG.getBitcast(VT, Shuf);
16904 }
16905 
16906 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
16907   SDValue InVec = N->getOperand(0);
16908   SDValue InVal = N->getOperand(1);
16909   SDValue EltNo = N->getOperand(2);
16910   SDLoc DL(N);
16911 
16912   EVT VT = InVec.getValueType();
16913   unsigned NumElts = VT.getVectorNumElements();
16914   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16915 
16916   // Insert into out-of-bounds element is undefined.
16917   if (IndexC && IndexC->getZExtValue() >= VT.getVectorNumElements())
16918     return DAG.getUNDEF(VT);
16919 
16920   // Remove redundant insertions:
16921   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16922   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16923       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
16924     return InVec;
16925 
16926   if (!IndexC) {
16927     // If this is variable insert to undef vector, it might be better to splat:
16928     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16929     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
16930       SmallVector<SDValue, 8> Ops(NumElts, InVal);
16931       return DAG.getBuildVector(VT, DL, Ops);
16932     }
16933     return SDValue();
16934   }
16935 
16936   // We must know which element is being inserted for folds below here.
16937   unsigned Elt = IndexC->getZExtValue();
16938   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
16939     return Shuf;
16940 
16941   // Canonicalize insert_vector_elt dag nodes.
16942   // Example:
16943   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16944   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16945   //
16946   // Do this only if the child insert_vector node has one use; also
16947   // do this only if indices are both constants and Idx1 < Idx0.
16948   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
16949       && isa<ConstantSDNode>(InVec.getOperand(2))) {
16950     unsigned OtherElt = InVec.getConstantOperandVal(2);
16951     if (Elt < OtherElt) {
16952       // Swap nodes.
16953       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
16954                                   InVec.getOperand(0), InVal, EltNo);
16955       AddToWorklist(NewOp.getNode());
16956       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
16957                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
16958     }
16959   }
16960 
16961   // If we can't generate a legal BUILD_VECTOR, exit
16962   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
16963     return SDValue();
16964 
16965   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16966   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
16967   // vector elements.
16968   SmallVector<SDValue, 8> Ops;
16969   // Do not combine these two vectors if the output vector will not replace
16970   // the input vector.
16971   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
16972     Ops.append(InVec.getNode()->op_begin(),
16973                InVec.getNode()->op_end());
16974   } else if (InVec.isUndef()) {
16975     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
16976   } else {
16977     return SDValue();
16978   }
16979   assert(Ops.size() == NumElts && "Unexpected vector size");
16980 
16981   // Insert the element
16982   if (Elt < Ops.size()) {
16983     // All the operands of BUILD_VECTOR must have the same type;
16984     // we enforce that here.
16985     EVT OpVT = Ops[0].getValueType();
16986     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
16987   }
16988 
16989   // Return the new vector
16990   return DAG.getBuildVector(VT, DL, Ops);
16991 }
16992 
16993 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
16994                                                   SDValue EltNo,
16995                                                   LoadSDNode *OriginalLoad) {
16996   assert(OriginalLoad->isSimple());
16997 
16998   EVT ResultVT = EVE->getValueType(0);
16999   EVT VecEltVT = InVecVT.getVectorElementType();
17000   unsigned Align = OriginalLoad->getAlignment();
17001   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
17002       VecEltVT.getTypeForEVT(*DAG.getContext()));
17003 
17004   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
17005     return SDValue();
17006 
17007   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
17008     ISD::NON_EXTLOAD : ISD::EXTLOAD;
17009   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
17010     return SDValue();
17011 
17012   Align = NewAlign;
17013 
17014   SDValue NewPtr = OriginalLoad->getBasePtr();
17015   SDValue Offset;
17016   EVT PtrType = NewPtr.getValueType();
17017   MachinePointerInfo MPI;
17018   SDLoc DL(EVE);
17019   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
17020     int Elt = ConstEltNo->getZExtValue();
17021     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
17022     Offset = DAG.getConstant(PtrOff, DL, PtrType);
17023     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
17024   } else {
17025     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
17026     Offset = DAG.getNode(
17027         ISD::MUL, DL, PtrType, Offset,
17028         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
17029     // Discard the pointer info except the address space because the memory
17030     // operand can't represent this new access since the offset is variable.
17031     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
17032   }
17033   NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
17034 
17035   // The replacement we need to do here is a little tricky: we need to
17036   // replace an extractelement of a load with a load.
17037   // Use ReplaceAllUsesOfValuesWith to do the replacement.
17038   // Note that this replacement assumes that the extractvalue is the only
17039   // use of the load; that's okay because we don't want to perform this
17040   // transformation in other cases anyway.
17041   SDValue Load;
17042   SDValue Chain;
17043   if (ResultVT.bitsGT(VecEltVT)) {
17044     // If the result type of vextract is wider than the load, then issue an
17045     // extending load instead.
17046     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
17047                                                   VecEltVT)
17048                                    ? ISD::ZEXTLOAD
17049                                    : ISD::EXTLOAD;
17050     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
17051                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
17052                           Align, OriginalLoad->getMemOperand()->getFlags(),
17053                           OriginalLoad->getAAInfo());
17054     Chain = Load.getValue(1);
17055   } else {
17056     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
17057                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
17058                        OriginalLoad->getAAInfo());
17059     Chain = Load.getValue(1);
17060     if (ResultVT.bitsLT(VecEltVT))
17061       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
17062     else
17063       Load = DAG.getBitcast(ResultVT, Load);
17064   }
17065   WorklistRemover DeadNodes(*this);
17066   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
17067   SDValue To[] = { Load, Chain };
17068   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
17069   // Make sure to revisit this node to clean it up; it will usually be dead.
17070   AddToWorklist(EVE);
17071   // Since we're explicitly calling ReplaceAllUses, add the new node to the
17072   // worklist explicitly as well.
17073   AddToWorklistWithUsers(Load.getNode());
17074   ++OpsNarrowed;
17075   return SDValue(EVE, 0);
17076 }
17077 
17078 /// Transform a vector binary operation into a scalar binary operation by moving
17079 /// the math/logic after an extract element of a vector.
17080 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
17081                                        bool LegalOperations) {
17082   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17083   SDValue Vec = ExtElt->getOperand(0);
17084   SDValue Index = ExtElt->getOperand(1);
17085   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17086   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
17087       Vec.getNode()->getNumValues() != 1)
17088     return SDValue();
17089 
17090   // Targets may want to avoid this to prevent an expensive register transfer.
17091   if (!TLI.shouldScalarizeBinop(Vec))
17092     return SDValue();
17093 
17094   // Extracting an element of a vector constant is constant-folded, so this
17095   // transform is just replacing a vector op with a scalar op while moving the
17096   // extract.
17097   SDValue Op0 = Vec.getOperand(0);
17098   SDValue Op1 = Vec.getOperand(1);
17099   if (isAnyConstantBuildVector(Op0, true) ||
17100       isAnyConstantBuildVector(Op1, true)) {
17101     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
17102     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
17103     SDLoc DL(ExtElt);
17104     EVT VT = ExtElt->getValueType(0);
17105     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
17106     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
17107     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
17108   }
17109 
17110   return SDValue();
17111 }
17112 
17113 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
17114   SDValue VecOp = N->getOperand(0);
17115   SDValue Index = N->getOperand(1);
17116   EVT ScalarVT = N->getValueType(0);
17117   EVT VecVT = VecOp.getValueType();
17118   if (VecOp.isUndef())
17119     return DAG.getUNDEF(ScalarVT);
17120 
17121   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
17122   //
17123   // This only really matters if the index is non-constant since other combines
17124   // on the constant elements already work.
17125   SDLoc DL(N);
17126   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
17127       Index == VecOp.getOperand(2)) {
17128     SDValue Elt = VecOp.getOperand(1);
17129     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
17130   }
17131 
17132   // (vextract (scalar_to_vector val, 0) -> val
17133   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17134     // Only 0'th element of SCALAR_TO_VECTOR is defined.
17135     if (DAG.isKnownNeverZero(Index))
17136       return DAG.getUNDEF(ScalarVT);
17137 
17138     // Check if the result type doesn't match the inserted element type. A
17139     // SCALAR_TO_VECTOR may truncate the inserted element and the
17140     // EXTRACT_VECTOR_ELT may widen the extracted vector.
17141     SDValue InOp = VecOp.getOperand(0);
17142     if (InOp.getValueType() != ScalarVT) {
17143       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17144       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17145     }
17146     return InOp;
17147   }
17148 
17149   // extract_vector_elt of out-of-bounds element -> UNDEF
17150   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17151   unsigned NumElts = VecVT.getVectorNumElements();
17152   if (IndexC && IndexC->getAPIntValue().uge(NumElts))
17153     return DAG.getUNDEF(ScalarVT);
17154 
17155   // extract_vector_elt (build_vector x, y), 1 -> y
17156   if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
17157       TLI.isTypeLegal(VecVT) &&
17158       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
17159     SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
17160     EVT InEltVT = Elt.getValueType();
17161 
17162     // Sometimes build_vector's scalar input types do not match result type.
17163     if (ScalarVT == InEltVT)
17164       return Elt;
17165 
17166     // TODO: It may be useful to truncate if free if the build_vector implicitly
17167     // converts.
17168   }
17169 
17170   // TODO: These transforms should not require the 'hasOneUse' restriction, but
17171   // there are regressions on multiple targets without it. We can end up with a
17172   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
17173   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
17174       VecOp.hasOneUse()) {
17175     // The vector index of the LSBs of the source depend on the endian-ness.
17176     bool IsLE = DAG.getDataLayout().isLittleEndian();
17177     unsigned ExtractIndex = IndexC->getZExtValue();
17178     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
17179     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
17180     SDValue BCSrc = VecOp.getOperand(0);
17181     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
17182       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
17183 
17184     if (LegalTypes && BCSrc.getValueType().isInteger() &&
17185         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17186       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
17187       // trunc i64 X to i32
17188       SDValue X = BCSrc.getOperand(0);
17189       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
17190              "Extract element and scalar to vector can't change element type "
17191              "from FP to integer.");
17192       unsigned XBitWidth = X.getValueSizeInBits();
17193       unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
17194       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
17195 
17196       // An extract element return value type can be wider than its vector
17197       // operand element type. In that case, the high bits are undefined, so
17198       // it's possible that we may need to extend rather than truncate.
17199       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
17200         assert(XBitWidth % VecEltBitWidth == 0 &&
17201                "Scalar bitwidth must be a multiple of vector element bitwidth");
17202         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
17203       }
17204     }
17205   }
17206 
17207   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
17208     return BO;
17209 
17210   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
17211   // We only perform this optimization before the op legalization phase because
17212   // we may introduce new vector instructions which are not backed by TD
17213   // patterns. For example on AVX, extracting elements from a wide vector
17214   // without using extract_subvector. However, if we can find an underlying
17215   // scalar value, then we can always use that.
17216   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
17217     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
17218     // Find the new index to extract from.
17219     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
17220 
17221     // Extracting an undef index is undef.
17222     if (OrigElt == -1)
17223       return DAG.getUNDEF(ScalarVT);
17224 
17225     // Select the right vector half to extract from.
17226     SDValue SVInVec;
17227     if (OrigElt < (int)NumElts) {
17228       SVInVec = VecOp.getOperand(0);
17229     } else {
17230       SVInVec = VecOp.getOperand(1);
17231       OrigElt -= NumElts;
17232     }
17233 
17234     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
17235       SDValue InOp = SVInVec.getOperand(OrigElt);
17236       if (InOp.getValueType() != ScalarVT) {
17237         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17238         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17239       }
17240 
17241       return InOp;
17242     }
17243 
17244     // FIXME: We should handle recursing on other vector shuffles and
17245     // scalar_to_vector here as well.
17246 
17247     if (!LegalOperations ||
17248         // FIXME: Should really be just isOperationLegalOrCustom.
17249         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
17250         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
17251       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
17252                          DAG.getVectorIdxConstant(OrigElt, DL));
17253     }
17254   }
17255 
17256   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
17257   // simplify it based on the (valid) extraction indices.
17258   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
17259         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17260                Use->getOperand(0) == VecOp &&
17261                isa<ConstantSDNode>(Use->getOperand(1));
17262       })) {
17263     APInt DemandedElts = APInt::getNullValue(NumElts);
17264     for (SDNode *Use : VecOp->uses()) {
17265       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
17266       if (CstElt->getAPIntValue().ult(NumElts))
17267         DemandedElts.setBit(CstElt->getZExtValue());
17268     }
17269     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
17270       // We simplified the vector operand of this extract element. If this
17271       // extract is not dead, visit it again so it is folded properly.
17272       if (N->getOpcode() != ISD::DELETED_NODE)
17273         AddToWorklist(N);
17274       return SDValue(N, 0);
17275     }
17276   }
17277 
17278   // Everything under here is trying to match an extract of a loaded value.
17279   // If the result of load has to be truncated, then it's not necessarily
17280   // profitable.
17281   bool BCNumEltsChanged = false;
17282   EVT ExtVT = VecVT.getVectorElementType();
17283   EVT LVT = ExtVT;
17284   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
17285     return SDValue();
17286 
17287   if (VecOp.getOpcode() == ISD::BITCAST) {
17288     // Don't duplicate a load with other uses.
17289     if (!VecOp.hasOneUse())
17290       return SDValue();
17291 
17292     EVT BCVT = VecOp.getOperand(0).getValueType();
17293     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
17294       return SDValue();
17295     if (NumElts != BCVT.getVectorNumElements())
17296       BCNumEltsChanged = true;
17297     VecOp = VecOp.getOperand(0);
17298     ExtVT = BCVT.getVectorElementType();
17299   }
17300 
17301   // extract (vector load $addr), i --> load $addr + i * size
17302   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
17303       ISD::isNormalLoad(VecOp.getNode()) &&
17304       !Index->hasPredecessor(VecOp.getNode())) {
17305     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
17306     if (VecLoad && VecLoad->isSimple())
17307       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
17308   }
17309 
17310   // Perform only after legalization to ensure build_vector / vector_shuffle
17311   // optimizations have already been done.
17312   if (!LegalOperations || !IndexC)
17313     return SDValue();
17314 
17315   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
17316   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
17317   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
17318   int Elt = IndexC->getZExtValue();
17319   LoadSDNode *LN0 = nullptr;
17320   if (ISD::isNormalLoad(VecOp.getNode())) {
17321     LN0 = cast<LoadSDNode>(VecOp);
17322   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17323              VecOp.getOperand(0).getValueType() == ExtVT &&
17324              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
17325     // Don't duplicate a load with other uses.
17326     if (!VecOp.hasOneUse())
17327       return SDValue();
17328 
17329     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
17330   }
17331   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
17332     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
17333     // =>
17334     // (load $addr+1*size)
17335 
17336     // Don't duplicate a load with other uses.
17337     if (!VecOp.hasOneUse())
17338       return SDValue();
17339 
17340     // If the bit convert changed the number of elements, it is unsafe
17341     // to examine the mask.
17342     if (BCNumEltsChanged)
17343       return SDValue();
17344 
17345     // Select the input vector, guarding against out of range extract vector.
17346     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
17347     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
17348 
17349     if (VecOp.getOpcode() == ISD::BITCAST) {
17350       // Don't duplicate a load with other uses.
17351       if (!VecOp.hasOneUse())
17352         return SDValue();
17353 
17354       VecOp = VecOp.getOperand(0);
17355     }
17356     if (ISD::isNormalLoad(VecOp.getNode())) {
17357       LN0 = cast<LoadSDNode>(VecOp);
17358       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
17359       Index = DAG.getConstant(Elt, DL, Index.getValueType());
17360     }
17361   }
17362 
17363   // Make sure we found a non-volatile load and the extractelement is
17364   // the only use.
17365   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
17366     return SDValue();
17367 
17368   // If Idx was -1 above, Elt is going to be -1, so just return undef.
17369   if (Elt == -1)
17370     return DAG.getUNDEF(LVT);
17371 
17372   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17373 }
17374 
17375 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
17376 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17377   // We perform this optimization post type-legalization because
17378   // the type-legalizer often scalarizes integer-promoted vectors.
17379   // Performing this optimization before may create bit-casts which
17380   // will be type-legalized to complex code sequences.
17381   // We perform this optimization only before the operation legalizer because we
17382   // may introduce illegal operations.
17383   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
17384     return SDValue();
17385 
17386   unsigned NumInScalars = N->getNumOperands();
17387   SDLoc DL(N);
17388   EVT VT = N->getValueType(0);
17389 
17390   // Check to see if this is a BUILD_VECTOR of a bunch of values
17391   // which come from any_extend or zero_extend nodes. If so, we can create
17392   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
17393   // optimizations. We do not handle sign-extend because we can't fill the sign
17394   // using shuffles.
17395   EVT SourceType = MVT::Other;
17396   bool AllAnyExt = true;
17397 
17398   for (unsigned i = 0; i != NumInScalars; ++i) {
17399     SDValue In = N->getOperand(i);
17400     // Ignore undef inputs.
17401     if (In.isUndef()) continue;
17402 
17403     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
17404     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
17405 
17406     // Abort if the element is not an extension.
17407     if (!ZeroExt && !AnyExt) {
17408       SourceType = MVT::Other;
17409       break;
17410     }
17411 
17412     // The input is a ZeroExt or AnyExt. Check the original type.
17413     EVT InTy = In.getOperand(0).getValueType();
17414 
17415     // Check that all of the widened source types are the same.
17416     if (SourceType == MVT::Other)
17417       // First time.
17418       SourceType = InTy;
17419     else if (InTy != SourceType) {
17420       // Multiple income types. Abort.
17421       SourceType = MVT::Other;
17422       break;
17423     }
17424 
17425     // Check if all of the extends are ANY_EXTENDs.
17426     AllAnyExt &= AnyExt;
17427   }
17428 
17429   // In order to have valid types, all of the inputs must be extended from the
17430   // same source type and all of the inputs must be any or zero extend.
17431   // Scalar sizes must be a power of two.
17432   EVT OutScalarTy = VT.getScalarType();
17433   bool ValidTypes = SourceType != MVT::Other &&
17434                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
17435                  isPowerOf2_32(SourceType.getSizeInBits());
17436 
17437   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
17438   // turn into a single shuffle instruction.
17439   if (!ValidTypes)
17440     return SDValue();
17441 
17442   bool isLE = DAG.getDataLayout().isLittleEndian();
17443   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
17444   assert(ElemRatio > 1 && "Invalid element size ratio");
17445   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
17446                                DAG.getConstant(0, DL, SourceType);
17447 
17448   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
17449   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
17450 
17451   // Populate the new build_vector
17452   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17453     SDValue Cast = N->getOperand(i);
17454     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
17455             Cast.getOpcode() == ISD::ZERO_EXTEND ||
17456             Cast.isUndef()) && "Invalid cast opcode");
17457     SDValue In;
17458     if (Cast.isUndef())
17459       In = DAG.getUNDEF(SourceType);
17460     else
17461       In = Cast->getOperand(0);
17462     unsigned Index = isLE ? (i * ElemRatio) :
17463                             (i * ElemRatio + (ElemRatio - 1));
17464 
17465     assert(Index < Ops.size() && "Invalid index");
17466     Ops[Index] = In;
17467   }
17468 
17469   // The type of the new BUILD_VECTOR node.
17470   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
17471   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
17472          "Invalid vector size");
17473   // Check if the new vector type is legal.
17474   if (!isTypeLegal(VecVT) ||
17475       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
17476        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
17477     return SDValue();
17478 
17479   // Make the new BUILD_VECTOR.
17480   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
17481 
17482   // The new BUILD_VECTOR node has the potential to be further optimized.
17483   AddToWorklist(BV.getNode());
17484   // Bitcast to the desired type.
17485   return DAG.getBitcast(VT, BV);
17486 }
17487 
17488 // Simplify (build_vec (trunc $1)
17489 //                     (trunc (srl $1 half-width))
17490 //                     (trunc (srl $1 (2 * half-width))) …)
17491 // to (bitcast $1)
17492 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
17493   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
17494 
17495   // Only for little endian
17496   if (!DAG.getDataLayout().isLittleEndian())
17497     return SDValue();
17498 
17499   SDLoc DL(N);
17500   EVT VT = N->getValueType(0);
17501   EVT OutScalarTy = VT.getScalarType();
17502   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
17503 
17504   // Only for power of two types to be sure that bitcast works well
17505   if (!isPowerOf2_64(ScalarTypeBitsize))
17506     return SDValue();
17507 
17508   unsigned NumInScalars = N->getNumOperands();
17509 
17510   // Look through bitcasts
17511   auto PeekThroughBitcast = [](SDValue Op) {
17512     if (Op.getOpcode() == ISD::BITCAST)
17513       return Op.getOperand(0);
17514     return Op;
17515   };
17516 
17517   // The source value where all the parts are extracted.
17518   SDValue Src;
17519   for (unsigned i = 0; i != NumInScalars; ++i) {
17520     SDValue In = PeekThroughBitcast(N->getOperand(i));
17521     // Ignore undef inputs.
17522     if (In.isUndef()) continue;
17523 
17524     if (In.getOpcode() != ISD::TRUNCATE)
17525       return SDValue();
17526 
17527     In = PeekThroughBitcast(In.getOperand(0));
17528 
17529     if (In.getOpcode() != ISD::SRL) {
17530       // For now only build_vec without shuffling, handle shifts here in the
17531       // future.
17532       if (i != 0)
17533         return SDValue();
17534 
17535       Src = In;
17536     } else {
17537       // In is SRL
17538       SDValue part = PeekThroughBitcast(In.getOperand(0));
17539 
17540       if (!Src) {
17541         Src = part;
17542       } else if (Src != part) {
17543         // Vector parts do not stem from the same variable
17544         return SDValue();
17545       }
17546 
17547       SDValue ShiftAmtVal = In.getOperand(1);
17548       if (!isa<ConstantSDNode>(ShiftAmtVal))
17549         return SDValue();
17550 
17551       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
17552 
17553       // The extracted value is not extracted at the right position
17554       if (ShiftAmt != i * ScalarTypeBitsize)
17555         return SDValue();
17556     }
17557   }
17558 
17559   // Only cast if the size is the same
17560   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
17561     return SDValue();
17562 
17563   return DAG.getBitcast(VT, Src);
17564 }
17565 
17566 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
17567                                            ArrayRef<int> VectorMask,
17568                                            SDValue VecIn1, SDValue VecIn2,
17569                                            unsigned LeftIdx, bool DidSplitVec) {
17570   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
17571 
17572   EVT VT = N->getValueType(0);
17573   EVT InVT1 = VecIn1.getValueType();
17574   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
17575 
17576   unsigned NumElems = VT.getVectorNumElements();
17577   unsigned ShuffleNumElems = NumElems;
17578 
17579   // If we artificially split a vector in two already, then the offsets in the
17580   // operands will all be based off of VecIn1, even those in VecIn2.
17581   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
17582 
17583   // We can't generate a shuffle node with mismatched input and output types.
17584   // Try to make the types match the type of the output.
17585   if (InVT1 != VT || InVT2 != VT) {
17586     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
17587       // If the output vector length is a multiple of both input lengths,
17588       // we can concatenate them and pad the rest with undefs.
17589       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
17590       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
17591       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
17592       ConcatOps[0] = VecIn1;
17593       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
17594       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17595       VecIn2 = SDValue();
17596     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
17597       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
17598         return SDValue();
17599 
17600       if (!VecIn2.getNode()) {
17601         // If we only have one input vector, and it's twice the size of the
17602         // output, split it in two.
17603         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
17604                              DAG.getVectorIdxConstant(NumElems, DL));
17605         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
17606         // Since we now have shorter input vectors, adjust the offset of the
17607         // second vector's start.
17608         Vec2Offset = NumElems;
17609       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
17610         // VecIn1 is wider than the output, and we have another, possibly
17611         // smaller input. Pad the smaller input with undefs, shuffle at the
17612         // input vector width, and extract the output.
17613         // The shuffle type is different than VT, so check legality again.
17614         if (LegalOperations &&
17615             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
17616           return SDValue();
17617 
17618         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
17619         // lower it back into a BUILD_VECTOR. So if the inserted type is
17620         // illegal, don't even try.
17621         if (InVT1 != InVT2) {
17622           if (!TLI.isTypeLegal(InVT2))
17623             return SDValue();
17624           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
17625                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
17626         }
17627         ShuffleNumElems = NumElems * 2;
17628       } else {
17629         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
17630         // than VecIn1. We can't handle this for now - this case will disappear
17631         // when we start sorting the vectors by type.
17632         return SDValue();
17633       }
17634     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
17635                InVT1.getSizeInBits() == VT.getSizeInBits()) {
17636       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
17637       ConcatOps[0] = VecIn2;
17638       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17639     } else {
17640       // TODO: Support cases where the length mismatch isn't exactly by a
17641       // factor of 2.
17642       // TODO: Move this check upwards, so that if we have bad type
17643       // mismatches, we don't create any DAG nodes.
17644       return SDValue();
17645     }
17646   }
17647 
17648   // Initialize mask to undef.
17649   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
17650 
17651   // Only need to run up to the number of elements actually used, not the
17652   // total number of elements in the shuffle - if we are shuffling a wider
17653   // vector, the high lanes should be set to undef.
17654   for (unsigned i = 0; i != NumElems; ++i) {
17655     if (VectorMask[i] <= 0)
17656       continue;
17657 
17658     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
17659     if (VectorMask[i] == (int)LeftIdx) {
17660       Mask[i] = ExtIndex;
17661     } else if (VectorMask[i] == (int)LeftIdx + 1) {
17662       Mask[i] = Vec2Offset + ExtIndex;
17663     }
17664   }
17665 
17666   // The type the input vectors may have changed above.
17667   InVT1 = VecIn1.getValueType();
17668 
17669   // If we already have a VecIn2, it should have the same type as VecIn1.
17670   // If we don't, get an undef/zero vector of the appropriate type.
17671   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
17672   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
17673 
17674   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
17675   if (ShuffleNumElems > NumElems)
17676     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
17677 
17678   return Shuffle;
17679 }
17680 
17681 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
17682   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
17683 
17684   // First, determine where the build vector is not undef.
17685   // TODO: We could extend this to handle zero elements as well as undefs.
17686   int NumBVOps = BV->getNumOperands();
17687   int ZextElt = -1;
17688   for (int i = 0; i != NumBVOps; ++i) {
17689     SDValue Op = BV->getOperand(i);
17690     if (Op.isUndef())
17691       continue;
17692     if (ZextElt == -1)
17693       ZextElt = i;
17694     else
17695       return SDValue();
17696   }
17697   // Bail out if there's no non-undef element.
17698   if (ZextElt == -1)
17699     return SDValue();
17700 
17701   // The build vector contains some number of undef elements and exactly
17702   // one other element. That other element must be a zero-extended scalar
17703   // extracted from a vector at a constant index to turn this into a shuffle.
17704   // Also, require that the build vector does not implicitly truncate/extend
17705   // its elements.
17706   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
17707   EVT VT = BV->getValueType(0);
17708   SDValue Zext = BV->getOperand(ZextElt);
17709   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
17710       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17711       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
17712       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
17713     return SDValue();
17714 
17715   // The zero-extend must be a multiple of the source size, and we must be
17716   // building a vector of the same size as the source of the extract element.
17717   SDValue Extract = Zext.getOperand(0);
17718   unsigned DestSize = Zext.getValueSizeInBits();
17719   unsigned SrcSize = Extract.getValueSizeInBits();
17720   if (DestSize % SrcSize != 0 ||
17721       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
17722     return SDValue();
17723 
17724   // Create a shuffle mask that will combine the extracted element with zeros
17725   // and undefs.
17726   int ZextRatio = DestSize / SrcSize;
17727   int NumMaskElts = NumBVOps * ZextRatio;
17728   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
17729   for (int i = 0; i != NumMaskElts; ++i) {
17730     if (i / ZextRatio == ZextElt) {
17731       // The low bits of the (potentially translated) extracted element map to
17732       // the source vector. The high bits map to zero. We will use a zero vector
17733       // as the 2nd source operand of the shuffle, so use the 1st element of
17734       // that vector (mask value is number-of-elements) for the high bits.
17735       if (i % ZextRatio == 0)
17736         ShufMask[i] = Extract.getConstantOperandVal(1);
17737       else
17738         ShufMask[i] = NumMaskElts;
17739     }
17740 
17741     // Undef elements of the build vector remain undef because we initialize
17742     // the shuffle mask with -1.
17743   }
17744 
17745   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
17746   // bitcast (shuffle V, ZeroVec, VectorMask)
17747   SDLoc DL(BV);
17748   EVT VecVT = Extract.getOperand(0).getValueType();
17749   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
17750   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17751   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
17752                                              ZeroVec, ShufMask, DAG);
17753   if (!Shuf)
17754     return SDValue();
17755   return DAG.getBitcast(VT, Shuf);
17756 }
17757 
17758 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
17759 // operations. If the types of the vectors we're extracting from allow it,
17760 // turn this into a vector_shuffle node.
17761 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
17762   SDLoc DL(N);
17763   EVT VT = N->getValueType(0);
17764 
17765   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
17766   if (!isTypeLegal(VT))
17767     return SDValue();
17768 
17769   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
17770     return V;
17771 
17772   // May only combine to shuffle after legalize if shuffle is legal.
17773   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
17774     return SDValue();
17775 
17776   bool UsesZeroVector = false;
17777   unsigned NumElems = N->getNumOperands();
17778 
17779   // Record, for each element of the newly built vector, which input vector
17780   // that element comes from. -1 stands for undef, 0 for the zero vector,
17781   // and positive values for the input vectors.
17782   // VectorMask maps each element to its vector number, and VecIn maps vector
17783   // numbers to their initial SDValues.
17784 
17785   SmallVector<int, 8> VectorMask(NumElems, -1);
17786   SmallVector<SDValue, 8> VecIn;
17787   VecIn.push_back(SDValue());
17788 
17789   for (unsigned i = 0; i != NumElems; ++i) {
17790     SDValue Op = N->getOperand(i);
17791 
17792     if (Op.isUndef())
17793       continue;
17794 
17795     // See if we can use a blend with a zero vector.
17796     // TODO: Should we generalize this to a blend with an arbitrary constant
17797     // vector?
17798     if (isNullConstant(Op) || isNullFPConstant(Op)) {
17799       UsesZeroVector = true;
17800       VectorMask[i] = 0;
17801       continue;
17802     }
17803 
17804     // Not an undef or zero. If the input is something other than an
17805     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
17806     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17807         !isa<ConstantSDNode>(Op.getOperand(1)))
17808       return SDValue();
17809     SDValue ExtractedFromVec = Op.getOperand(0);
17810 
17811     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
17812     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
17813       return SDValue();
17814 
17815     // All inputs must have the same element type as the output.
17816     if (VT.getVectorElementType() !=
17817         ExtractedFromVec.getValueType().getVectorElementType())
17818       return SDValue();
17819 
17820     // Have we seen this input vector before?
17821     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
17822     // a map back from SDValues to numbers isn't worth it.
17823     unsigned Idx = std::distance(
17824         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
17825     if (Idx == VecIn.size())
17826       VecIn.push_back(ExtractedFromVec);
17827 
17828     VectorMask[i] = Idx;
17829   }
17830 
17831   // If we didn't find at least one input vector, bail out.
17832   if (VecIn.size() < 2)
17833     return SDValue();
17834 
17835   // If all the Operands of BUILD_VECTOR extract from same
17836   // vector, then split the vector efficiently based on the maximum
17837   // vector access index and adjust the VectorMask and
17838   // VecIn accordingly.
17839   bool DidSplitVec = false;
17840   if (VecIn.size() == 2) {
17841     unsigned MaxIndex = 0;
17842     unsigned NearestPow2 = 0;
17843     SDValue Vec = VecIn.back();
17844     EVT InVT = Vec.getValueType();
17845     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
17846 
17847     for (unsigned i = 0; i < NumElems; i++) {
17848       if (VectorMask[i] <= 0)
17849         continue;
17850       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
17851       IndexVec[i] = Index;
17852       MaxIndex = std::max(MaxIndex, Index);
17853     }
17854 
17855     NearestPow2 = PowerOf2Ceil(MaxIndex);
17856     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
17857         NumElems * 2 < NearestPow2) {
17858       unsigned SplitSize = NearestPow2 / 2;
17859       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
17860                                      InVT.getVectorElementType(), SplitSize);
17861       if (TLI.isTypeLegal(SplitVT)) {
17862         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17863                                      DAG.getVectorIdxConstant(SplitSize, DL));
17864         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17865                                      DAG.getVectorIdxConstant(0, DL));
17866         VecIn.pop_back();
17867         VecIn.push_back(VecIn1);
17868         VecIn.push_back(VecIn2);
17869         DidSplitVec = true;
17870 
17871         for (unsigned i = 0; i < NumElems; i++) {
17872           if (VectorMask[i] <= 0)
17873             continue;
17874           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
17875         }
17876       }
17877     }
17878   }
17879 
17880   // TODO: We want to sort the vectors by descending length, so that adjacent
17881   // pairs have similar length, and the longer vector is always first in the
17882   // pair.
17883 
17884   // TODO: Should this fire if some of the input vectors has illegal type (like
17885   // it does now), or should we let legalization run its course first?
17886 
17887   // Shuffle phase:
17888   // Take pairs of vectors, and shuffle them so that the result has elements
17889   // from these vectors in the correct places.
17890   // For example, given:
17891   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
17892   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
17893   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
17894   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
17895   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
17896   // We will generate:
17897   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
17898   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
17899   SmallVector<SDValue, 4> Shuffles;
17900   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
17901     unsigned LeftIdx = 2 * In + 1;
17902     SDValue VecLeft = VecIn[LeftIdx];
17903     SDValue VecRight =
17904         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
17905 
17906     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
17907                                                 VecRight, LeftIdx, DidSplitVec))
17908       Shuffles.push_back(Shuffle);
17909     else
17910       return SDValue();
17911   }
17912 
17913   // If we need the zero vector as an "ingredient" in the blend tree, add it
17914   // to the list of shuffles.
17915   if (UsesZeroVector)
17916     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
17917                                       : DAG.getConstantFP(0.0, DL, VT));
17918 
17919   // If we only have one shuffle, we're done.
17920   if (Shuffles.size() == 1)
17921     return Shuffles[0];
17922 
17923   // Update the vector mask to point to the post-shuffle vectors.
17924   for (int &Vec : VectorMask)
17925     if (Vec == 0)
17926       Vec = Shuffles.size() - 1;
17927     else
17928       Vec = (Vec - 1) / 2;
17929 
17930   // More than one shuffle. Generate a binary tree of blends, e.g. if from
17931   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
17932   // generate:
17933   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
17934   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
17935   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
17936   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
17937   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
17938   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
17939   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
17940 
17941   // Make sure the initial size of the shuffle list is even.
17942   if (Shuffles.size() % 2)
17943     Shuffles.push_back(DAG.getUNDEF(VT));
17944 
17945   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
17946     if (CurSize % 2) {
17947       Shuffles[CurSize] = DAG.getUNDEF(VT);
17948       CurSize++;
17949     }
17950     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
17951       int Left = 2 * In;
17952       int Right = 2 * In + 1;
17953       SmallVector<int, 8> Mask(NumElems, -1);
17954       for (unsigned i = 0; i != NumElems; ++i) {
17955         if (VectorMask[i] == Left) {
17956           Mask[i] = i;
17957           VectorMask[i] = In;
17958         } else if (VectorMask[i] == Right) {
17959           Mask[i] = i + NumElems;
17960           VectorMask[i] = In;
17961         }
17962       }
17963 
17964       Shuffles[In] =
17965           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
17966     }
17967   }
17968   return Shuffles[0];
17969 }
17970 
17971 // Try to turn a build vector of zero extends of extract vector elts into a
17972 // a vector zero extend and possibly an extract subvector.
17973 // TODO: Support sign extend?
17974 // TODO: Allow undef elements?
17975 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
17976   if (LegalOperations)
17977     return SDValue();
17978 
17979   EVT VT = N->getValueType(0);
17980 
17981   bool FoundZeroExtend = false;
17982   SDValue Op0 = N->getOperand(0);
17983   auto checkElem = [&](SDValue Op) -> int64_t {
17984     unsigned Opc = Op.getOpcode();
17985     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
17986     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
17987         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17988         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
17989       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
17990         return C->getZExtValue();
17991     return -1;
17992   };
17993 
17994   // Make sure the first element matches
17995   // (zext (extract_vector_elt X, C))
17996   int64_t Offset = checkElem(Op0);
17997   if (Offset < 0)
17998     return SDValue();
17999 
18000   unsigned NumElems = N->getNumOperands();
18001   SDValue In = Op0.getOperand(0).getOperand(0);
18002   EVT InSVT = In.getValueType().getScalarType();
18003   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
18004 
18005   // Don't create an illegal input type after type legalization.
18006   if (LegalTypes && !TLI.isTypeLegal(InVT))
18007     return SDValue();
18008 
18009   // Ensure all the elements come from the same vector and are adjacent.
18010   for (unsigned i = 1; i != NumElems; ++i) {
18011     if ((Offset + i) != checkElem(N->getOperand(i)))
18012       return SDValue();
18013   }
18014 
18015   SDLoc DL(N);
18016   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
18017                    Op0.getOperand(0).getOperand(1));
18018   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
18019                      VT, In);
18020 }
18021 
18022 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
18023   EVT VT = N->getValueType(0);
18024 
18025   // A vector built entirely of undefs is undef.
18026   if (ISD::allOperandsUndef(N))
18027     return DAG.getUNDEF(VT);
18028 
18029   // If this is a splat of a bitcast from another vector, change to a
18030   // concat_vector.
18031   // For example:
18032   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
18033   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
18034   //
18035   // If X is a build_vector itself, the concat can become a larger build_vector.
18036   // TODO: Maybe this is useful for non-splat too?
18037   if (!LegalOperations) {
18038     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18039       Splat = peekThroughBitcasts(Splat);
18040       EVT SrcVT = Splat.getValueType();
18041       if (SrcVT.isVector()) {
18042         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
18043         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
18044                                      SrcVT.getVectorElementType(), NumElts);
18045         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
18046           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
18047           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
18048                                        NewVT, Ops);
18049           return DAG.getBitcast(VT, Concat);
18050         }
18051       }
18052     }
18053   }
18054 
18055   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
18056   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
18057     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18058       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
18059       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
18060     }
18061 
18062   // Check if we can express BUILD VECTOR via subvector extract.
18063   if (!LegalTypes && (N->getNumOperands() > 1)) {
18064     SDValue Op0 = N->getOperand(0);
18065     auto checkElem = [&](SDValue Op) -> uint64_t {
18066       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
18067           (Op0.getOperand(0) == Op.getOperand(0)))
18068         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
18069           return CNode->getZExtValue();
18070       return -1;
18071     };
18072 
18073     int Offset = checkElem(Op0);
18074     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
18075       if (Offset + i != checkElem(N->getOperand(i))) {
18076         Offset = -1;
18077         break;
18078       }
18079     }
18080 
18081     if ((Offset == 0) &&
18082         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
18083       return Op0.getOperand(0);
18084     if ((Offset != -1) &&
18085         ((Offset % N->getValueType(0).getVectorNumElements()) ==
18086          0)) // IDX must be multiple of output size.
18087       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
18088                          Op0.getOperand(0), Op0.getOperand(1));
18089   }
18090 
18091   if (SDValue V = convertBuildVecZextToZext(N))
18092     return V;
18093 
18094   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
18095     return V;
18096 
18097   if (SDValue V = reduceBuildVecTruncToBitCast(N))
18098     return V;
18099 
18100   if (SDValue V = reduceBuildVecToShuffle(N))
18101     return V;
18102 
18103   return SDValue();
18104 }
18105 
18106 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
18107   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18108   EVT OpVT = N->getOperand(0).getValueType();
18109 
18110   // If the operands are legal vectors, leave them alone.
18111   if (TLI.isTypeLegal(OpVT))
18112     return SDValue();
18113 
18114   SDLoc DL(N);
18115   EVT VT = N->getValueType(0);
18116   SmallVector<SDValue, 8> Ops;
18117 
18118   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
18119   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18120 
18121   // Keep track of what we encounter.
18122   bool AnyInteger = false;
18123   bool AnyFP = false;
18124   for (const SDValue &Op : N->ops()) {
18125     if (ISD::BITCAST == Op.getOpcode() &&
18126         !Op.getOperand(0).getValueType().isVector())
18127       Ops.push_back(Op.getOperand(0));
18128     else if (ISD::UNDEF == Op.getOpcode())
18129       Ops.push_back(ScalarUndef);
18130     else
18131       return SDValue();
18132 
18133     // Note whether we encounter an integer or floating point scalar.
18134     // If it's neither, bail out, it could be something weird like x86mmx.
18135     EVT LastOpVT = Ops.back().getValueType();
18136     if (LastOpVT.isFloatingPoint())
18137       AnyFP = true;
18138     else if (LastOpVT.isInteger())
18139       AnyInteger = true;
18140     else
18141       return SDValue();
18142   }
18143 
18144   // If any of the operands is a floating point scalar bitcast to a vector,
18145   // use floating point types throughout, and bitcast everything.
18146   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
18147   if (AnyFP) {
18148     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
18149     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18150     if (AnyInteger) {
18151       for (SDValue &Op : Ops) {
18152         if (Op.getValueType() == SVT)
18153           continue;
18154         if (Op.isUndef())
18155           Op = ScalarUndef;
18156         else
18157           Op = DAG.getBitcast(SVT, Op);
18158       }
18159     }
18160   }
18161 
18162   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
18163                                VT.getSizeInBits() / SVT.getSizeInBits());
18164   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
18165 }
18166 
18167 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
18168 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
18169 // most two distinct vectors the same size as the result, attempt to turn this
18170 // into a legal shuffle.
18171 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
18172   EVT VT = N->getValueType(0);
18173   EVT OpVT = N->getOperand(0).getValueType();
18174   int NumElts = VT.getVectorNumElements();
18175   int NumOpElts = OpVT.getVectorNumElements();
18176 
18177   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
18178   SmallVector<int, 8> Mask;
18179 
18180   for (SDValue Op : N->ops()) {
18181     Op = peekThroughBitcasts(Op);
18182 
18183     // UNDEF nodes convert to UNDEF shuffle mask values.
18184     if (Op.isUndef()) {
18185       Mask.append((unsigned)NumOpElts, -1);
18186       continue;
18187     }
18188 
18189     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18190       return SDValue();
18191 
18192     // What vector are we extracting the subvector from and at what index?
18193     SDValue ExtVec = Op.getOperand(0);
18194 
18195     // We want the EVT of the original extraction to correctly scale the
18196     // extraction index.
18197     EVT ExtVT = ExtVec.getValueType();
18198     ExtVec = peekThroughBitcasts(ExtVec);
18199 
18200     // UNDEF nodes convert to UNDEF shuffle mask values.
18201     if (ExtVec.isUndef()) {
18202       Mask.append((unsigned)NumOpElts, -1);
18203       continue;
18204     }
18205 
18206     if (!isa<ConstantSDNode>(Op.getOperand(1)))
18207       return SDValue();
18208     int ExtIdx = Op.getConstantOperandVal(1);
18209 
18210     // Ensure that we are extracting a subvector from a vector the same
18211     // size as the result.
18212     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
18213       return SDValue();
18214 
18215     // Scale the subvector index to account for any bitcast.
18216     int NumExtElts = ExtVT.getVectorNumElements();
18217     if (0 == (NumExtElts % NumElts))
18218       ExtIdx /= (NumExtElts / NumElts);
18219     else if (0 == (NumElts % NumExtElts))
18220       ExtIdx *= (NumElts / NumExtElts);
18221     else
18222       return SDValue();
18223 
18224     // At most we can reference 2 inputs in the final shuffle.
18225     if (SV0.isUndef() || SV0 == ExtVec) {
18226       SV0 = ExtVec;
18227       for (int i = 0; i != NumOpElts; ++i)
18228         Mask.push_back(i + ExtIdx);
18229     } else if (SV1.isUndef() || SV1 == ExtVec) {
18230       SV1 = ExtVec;
18231       for (int i = 0; i != NumOpElts; ++i)
18232         Mask.push_back(i + ExtIdx + NumElts);
18233     } else {
18234       return SDValue();
18235     }
18236   }
18237 
18238   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18239   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
18240                                      DAG.getBitcast(VT, SV1), Mask, DAG);
18241 }
18242 
18243 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
18244   // If we only have one input vector, we don't need to do any concatenation.
18245   if (N->getNumOperands() == 1)
18246     return N->getOperand(0);
18247 
18248   // Check if all of the operands are undefs.
18249   EVT VT = N->getValueType(0);
18250   if (ISD::allOperandsUndef(N))
18251     return DAG.getUNDEF(VT);
18252 
18253   // Optimize concat_vectors where all but the first of the vectors are undef.
18254   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
18255         return Op.isUndef();
18256       })) {
18257     SDValue In = N->getOperand(0);
18258     assert(In.getValueType().isVector() && "Must concat vectors");
18259 
18260     // If the input is a concat_vectors, just make a larger concat by padding
18261     // with smaller undefs.
18262     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
18263       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
18264       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
18265       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
18266       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18267     }
18268 
18269     SDValue Scalar = peekThroughOneUseBitcasts(In);
18270 
18271     // concat_vectors(scalar_to_vector(scalar), undef) ->
18272     //     scalar_to_vector(scalar)
18273     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18274          Scalar.hasOneUse()) {
18275       EVT SVT = Scalar.getValueType().getVectorElementType();
18276       if (SVT == Scalar.getOperand(0).getValueType())
18277         Scalar = Scalar.getOperand(0);
18278     }
18279 
18280     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
18281     if (!Scalar.getValueType().isVector()) {
18282       // If the bitcast type isn't legal, it might be a trunc of a legal type;
18283       // look through the trunc so we can still do the transform:
18284       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
18285       if (Scalar->getOpcode() == ISD::TRUNCATE &&
18286           !TLI.isTypeLegal(Scalar.getValueType()) &&
18287           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
18288         Scalar = Scalar->getOperand(0);
18289 
18290       EVT SclTy = Scalar.getValueType();
18291 
18292       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
18293         return SDValue();
18294 
18295       // Bail out if the vector size is not a multiple of the scalar size.
18296       if (VT.getSizeInBits() % SclTy.getSizeInBits())
18297         return SDValue();
18298 
18299       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
18300       if (VNTNumElms < 2)
18301         return SDValue();
18302 
18303       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
18304       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
18305         return SDValue();
18306 
18307       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
18308       return DAG.getBitcast(VT, Res);
18309     }
18310   }
18311 
18312   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
18313   // We have already tested above for an UNDEF only concatenation.
18314   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
18315   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
18316   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
18317     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
18318   };
18319   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
18320     SmallVector<SDValue, 8> Opnds;
18321     EVT SVT = VT.getScalarType();
18322 
18323     EVT MinVT = SVT;
18324     if (!SVT.isFloatingPoint()) {
18325       // If BUILD_VECTOR are from built from integer, they may have different
18326       // operand types. Get the smallest type and truncate all operands to it.
18327       bool FoundMinVT = false;
18328       for (const SDValue &Op : N->ops())
18329         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
18330           EVT OpSVT = Op.getOperand(0).getValueType();
18331           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
18332           FoundMinVT = true;
18333         }
18334       assert(FoundMinVT && "Concat vector type mismatch");
18335     }
18336 
18337     for (const SDValue &Op : N->ops()) {
18338       EVT OpVT = Op.getValueType();
18339       unsigned NumElts = OpVT.getVectorNumElements();
18340 
18341       if (ISD::UNDEF == Op.getOpcode())
18342         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
18343 
18344       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
18345         if (SVT.isFloatingPoint()) {
18346           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
18347           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
18348         } else {
18349           for (unsigned i = 0; i != NumElts; ++i)
18350             Opnds.push_back(
18351                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
18352         }
18353       }
18354     }
18355 
18356     assert(VT.getVectorNumElements() == Opnds.size() &&
18357            "Concat vector type mismatch");
18358     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
18359   }
18360 
18361   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
18362   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
18363     return V;
18364 
18365   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
18366   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
18367     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
18368       return V;
18369 
18370   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
18371   // nodes often generate nop CONCAT_VECTOR nodes.
18372   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
18373   // place the incoming vectors at the exact same location.
18374   SDValue SingleSource = SDValue();
18375   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
18376 
18377   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18378     SDValue Op = N->getOperand(i);
18379 
18380     if (Op.isUndef())
18381       continue;
18382 
18383     // Check if this is the identity extract:
18384     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18385       return SDValue();
18386 
18387     // Find the single incoming vector for the extract_subvector.
18388     if (SingleSource.getNode()) {
18389       if (Op.getOperand(0) != SingleSource)
18390         return SDValue();
18391     } else {
18392       SingleSource = Op.getOperand(0);
18393 
18394       // Check the source type is the same as the type of the result.
18395       // If not, this concat may extend the vector, so we can not
18396       // optimize it away.
18397       if (SingleSource.getValueType() != N->getValueType(0))
18398         return SDValue();
18399     }
18400 
18401     auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18402     // The extract index must be constant.
18403     if (!CS)
18404       return SDValue();
18405 
18406     // Check that we are reading from the identity index.
18407     unsigned IdentityIndex = i * PartNumElem;
18408     if (CS->getAPIntValue() != IdentityIndex)
18409       return SDValue();
18410   }
18411 
18412   if (SingleSource.getNode())
18413     return SingleSource;
18414 
18415   return SDValue();
18416 }
18417 
18418 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
18419 // if the subvector can be sourced for free.
18420 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
18421   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
18422       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
18423     return V.getOperand(1);
18424   }
18425   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18426   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
18427       V.getOperand(0).getValueType() == SubVT &&
18428       (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
18429     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
18430     return V.getOperand(SubIdx);
18431   }
18432   return SDValue();
18433 }
18434 
18435 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
18436                                               SelectionDAG &DAG) {
18437   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18438   SDValue BinOp = Extract->getOperand(0);
18439   unsigned BinOpcode = BinOp.getOpcode();
18440   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
18441     return SDValue();
18442 
18443   EVT VecVT = BinOp.getValueType();
18444   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
18445   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
18446     return SDValue();
18447 
18448   SDValue Index = Extract->getOperand(1);
18449   EVT SubVT = Extract->getValueType(0);
18450   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
18451     return SDValue();
18452 
18453   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
18454   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
18455 
18456   // TODO: We could handle the case where only 1 operand is being inserted by
18457   //       creating an extract of the other operand, but that requires checking
18458   //       number of uses and/or costs.
18459   if (!Sub0 || !Sub1)
18460     return SDValue();
18461 
18462   // We are inserting both operands of the wide binop only to extract back
18463   // to the narrow vector size. Eliminate all of the insert/extract:
18464   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
18465   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
18466                      BinOp->getFlags());
18467 }
18468 
18469 /// If we are extracting a subvector produced by a wide binary operator try
18470 /// to use a narrow binary operator and/or avoid concatenation and extraction.
18471 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
18472   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
18473   // some of these bailouts with other transforms.
18474 
18475   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
18476     return V;
18477 
18478   // The extract index must be a constant, so we can map it to a concat operand.
18479   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18480   if (!ExtractIndexC)
18481     return SDValue();
18482 
18483   // We are looking for an optionally bitcasted wide vector binary operator
18484   // feeding an extract subvector.
18485   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18486   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
18487   unsigned BOpcode = BinOp.getOpcode();
18488   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
18489     return SDValue();
18490 
18491   // The binop must be a vector type, so we can extract some fraction of it.
18492   EVT WideBVT = BinOp.getValueType();
18493   if (!WideBVT.isVector())
18494     return SDValue();
18495 
18496   EVT VT = Extract->getValueType(0);
18497   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
18498   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
18499          "Extract index is not a multiple of the vector length.");
18500 
18501   // Bail out if this is not a proper multiple width extraction.
18502   unsigned WideWidth = WideBVT.getSizeInBits();
18503   unsigned NarrowWidth = VT.getSizeInBits();
18504   if (WideWidth % NarrowWidth != 0)
18505     return SDValue();
18506 
18507   // Bail out if we are extracting a fraction of a single operation. This can
18508   // occur because we potentially looked through a bitcast of the binop.
18509   unsigned NarrowingRatio = WideWidth / NarrowWidth;
18510   unsigned WideNumElts = WideBVT.getVectorNumElements();
18511   if (WideNumElts % NarrowingRatio != 0)
18512     return SDValue();
18513 
18514   // Bail out if the target does not support a narrower version of the binop.
18515   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
18516                                    WideNumElts / NarrowingRatio);
18517   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
18518     return SDValue();
18519 
18520   // If extraction is cheap, we don't need to look at the binop operands
18521   // for concat ops. The narrow binop alone makes this transform profitable.
18522   // We can't just reuse the original extract index operand because we may have
18523   // bitcasted.
18524   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
18525   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
18526   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
18527       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
18528     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
18529     SDLoc DL(Extract);
18530     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
18531     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18532                             BinOp.getOperand(0), NewExtIndex);
18533     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18534                             BinOp.getOperand(1), NewExtIndex);
18535     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
18536                                       BinOp.getNode()->getFlags());
18537     return DAG.getBitcast(VT, NarrowBinOp);
18538   }
18539 
18540   // Only handle the case where we are doubling and then halving. A larger ratio
18541   // may require more than two narrow binops to replace the wide binop.
18542   if (NarrowingRatio != 2)
18543     return SDValue();
18544 
18545   // TODO: The motivating case for this transform is an x86 AVX1 target. That
18546   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
18547   // flavors, but no other 256-bit integer support. This could be extended to
18548   // handle any binop, but that may require fixing/adding other folds to avoid
18549   // codegen regressions.
18550   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
18551     return SDValue();
18552 
18553   // We need at least one concatenation operation of a binop operand to make
18554   // this transform worthwhile. The concat must double the input vector sizes.
18555   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
18556     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
18557       return V.getOperand(ConcatOpNum);
18558     return SDValue();
18559   };
18560   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
18561   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
18562 
18563   if (SubVecL || SubVecR) {
18564     // If a binop operand was not the result of a concat, we must extract a
18565     // half-sized operand for our new narrow binop:
18566     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
18567     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
18568     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
18569     SDLoc DL(Extract);
18570     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
18571     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
18572                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18573                                       BinOp.getOperand(0), IndexC);
18574 
18575     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
18576                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18577                                       BinOp.getOperand(1), IndexC);
18578 
18579     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
18580     return DAG.getBitcast(VT, NarrowBinOp);
18581   }
18582 
18583   return SDValue();
18584 }
18585 
18586 /// If we are extracting a subvector from a wide vector load, convert to a
18587 /// narrow load to eliminate the extraction:
18588 /// (extract_subvector (load wide vector)) --> (load narrow vector)
18589 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
18590   // TODO: Add support for big-endian. The offset calculation must be adjusted.
18591   if (DAG.getDataLayout().isBigEndian())
18592     return SDValue();
18593 
18594   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
18595   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18596   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
18597       !ExtIdx)
18598     return SDValue();
18599 
18600   // Allow targets to opt-out.
18601   EVT VT = Extract->getValueType(0);
18602   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18603   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
18604     return SDValue();
18605 
18606   // The narrow load will be offset from the base address of the old load if
18607   // we are extracting from something besides index 0 (little-endian).
18608   SDLoc DL(Extract);
18609   SDValue BaseAddr = Ld->getOperand(1);
18610   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
18611 
18612   // TODO: Use "BaseIndexOffset" to make this more effective.
18613   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
18614   MachineFunction &MF = DAG.getMachineFunction();
18615   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
18616                                                    VT.getStoreSize());
18617   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
18618   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
18619   return NewLd;
18620 }
18621 
18622 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
18623   EVT NVT = N->getValueType(0);
18624   SDValue V = N->getOperand(0);
18625 
18626   // Extract from UNDEF is UNDEF.
18627   if (V.isUndef())
18628     return DAG.getUNDEF(NVT);
18629 
18630   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
18631     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
18632       return NarrowLoad;
18633 
18634   // Combine an extract of an extract into a single extract_subvector.
18635   // ext (ext X, C), 0 --> ext X, C
18636   SDValue Index = N->getOperand(1);
18637   if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18638       V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
18639     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
18640                                     V.getConstantOperandVal(1)) &&
18641         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
18642       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
18643                          V.getOperand(1));
18644     }
18645   }
18646 
18647   // Try to move vector bitcast after extract_subv by scaling extraction index:
18648   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
18649   if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
18650       V.getOperand(0).getValueType().isVector()) {
18651     SDValue SrcOp = V.getOperand(0);
18652     EVT SrcVT = SrcOp.getValueType();
18653     unsigned SrcNumElts = SrcVT.getVectorNumElements();
18654     unsigned DestNumElts = V.getValueType().getVectorNumElements();
18655     if ((SrcNumElts % DestNumElts) == 0) {
18656       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
18657       unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
18658       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
18659                                       NewExtNumElts);
18660       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
18661         unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
18662         SDLoc DL(N);
18663         SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
18664         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
18665                                          V.getOperand(0), NewIndex);
18666         return DAG.getBitcast(NVT, NewExtract);
18667       }
18668     }
18669     if ((DestNumElts % SrcNumElts) == 0) {
18670       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
18671       if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) {
18672         unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio;
18673         EVT ScalarVT = SrcVT.getScalarType();
18674         if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0) {
18675           SDLoc DL(N);
18676           unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
18677           EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
18678                                           ScalarVT, NewExtNumElts);
18679           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
18680             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
18681             SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
18682                                              V.getOperand(0), NewIndex);
18683             return DAG.getBitcast(NVT, NewExtract);
18684           }
18685           if (NewExtNumElts == 1 &&
18686               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
18687             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
18688             SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
18689                                              V.getOperand(0), NewIndex);
18690             return DAG.getBitcast(NVT, NewExtract);
18691           }
18692         }
18693       }
18694     }
18695   }
18696 
18697   if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) {
18698     EVT ConcatSrcVT = V.getOperand(0).getValueType();
18699     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
18700            "Concat and extract subvector do not change element type");
18701 
18702     unsigned ExtIdx = N->getConstantOperandVal(1);
18703     unsigned ExtNumElts = NVT.getVectorNumElements();
18704     assert(ExtIdx % ExtNumElts == 0 &&
18705            "Extract index is not a multiple of the input vector length.");
18706 
18707     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements();
18708     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
18709 
18710     // If the concatenated source types match this extract, it's a direct
18711     // simplification:
18712     // extract_subvec (concat V1, V2, ...), i --> Vi
18713     if (ConcatSrcNumElts == ExtNumElts)
18714       return V.getOperand(ConcatOpIdx);
18715 
18716     // If the concatenated source vectors are a multiple length of this extract,
18717     // then extract a fraction of one of those source vectors directly from a
18718     // concat operand. Example:
18719     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
18720     //   v2i8 extract_subvec v8i8 Y, 6
18721     if (ConcatSrcNumElts % ExtNumElts == 0) {
18722       SDLoc DL(N);
18723       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
18724       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
18725              "Trying to extract from >1 concat operand?");
18726       assert(NewExtIdx % ExtNumElts == 0 &&
18727              "Extract index is not a multiple of the input vector length.");
18728       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
18729       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
18730                          V.getOperand(ConcatOpIdx), NewIndexC);
18731     }
18732   }
18733 
18734   V = peekThroughBitcasts(V);
18735 
18736   // If the input is a build vector. Try to make a smaller build vector.
18737   if (V.getOpcode() == ISD::BUILD_VECTOR) {
18738     if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
18739       EVT InVT = V.getValueType();
18740       unsigned ExtractSize = NVT.getSizeInBits();
18741       unsigned EltSize = InVT.getScalarSizeInBits();
18742       // Only do this if we won't split any elements.
18743       if (ExtractSize % EltSize == 0) {
18744         unsigned NumElems = ExtractSize / EltSize;
18745         EVT EltVT = InVT.getVectorElementType();
18746         EVT ExtractVT = NumElems == 1 ? EltVT
18747                                       : EVT::getVectorVT(*DAG.getContext(),
18748                                                          EltVT, NumElems);
18749         if ((Level < AfterLegalizeDAG ||
18750              (NumElems == 1 ||
18751               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
18752             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
18753           unsigned IdxVal = IdxC->getZExtValue();
18754           IdxVal *= NVT.getScalarSizeInBits();
18755           IdxVal /= EltSize;
18756 
18757           if (NumElems == 1) {
18758             SDValue Src = V->getOperand(IdxVal);
18759             if (EltVT != Src.getValueType())
18760               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
18761             return DAG.getBitcast(NVT, Src);
18762           }
18763 
18764           // Extract the pieces from the original build_vector.
18765           SDValue BuildVec = DAG.getBuildVector(
18766               ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
18767           return DAG.getBitcast(NVT, BuildVec);
18768         }
18769       }
18770     }
18771   }
18772 
18773   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
18774     // Handle only simple case where vector being inserted and vector
18775     // being extracted are of same size.
18776     EVT SmallVT = V.getOperand(1).getValueType();
18777     if (!NVT.bitsEq(SmallVT))
18778       return SDValue();
18779 
18780     // Only handle cases where both indexes are constants.
18781     auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
18782     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
18783     if (InsIdx && ExtIdx) {
18784       // Combine:
18785       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
18786       // Into:
18787       //    indices are equal or bit offsets are equal => V1
18788       //    otherwise => (extract_subvec V1, ExtIdx)
18789       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
18790           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
18791         return DAG.getBitcast(NVT, V.getOperand(1));
18792       return DAG.getNode(
18793           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
18794           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
18795           Index);
18796     }
18797   }
18798 
18799   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
18800     return NarrowBOp;
18801 
18802   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18803     return SDValue(N, 0);
18804 
18805   return SDValue();
18806 }
18807 
18808 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
18809 /// followed by concatenation. Narrow vector ops may have better performance
18810 /// than wide ops, and this can unlock further narrowing of other vector ops.
18811 /// Targets can invert this transform later if it is not profitable.
18812 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
18813                                          SelectionDAG &DAG) {
18814   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
18815   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
18816       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
18817       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
18818     return SDValue();
18819 
18820   // Split the wide shuffle mask into halves. Any mask element that is accessing
18821   // operand 1 is offset down to account for narrowing of the vectors.
18822   ArrayRef<int> Mask = Shuf->getMask();
18823   EVT VT = Shuf->getValueType(0);
18824   unsigned NumElts = VT.getVectorNumElements();
18825   unsigned HalfNumElts = NumElts / 2;
18826   SmallVector<int, 16> Mask0(HalfNumElts, -1);
18827   SmallVector<int, 16> Mask1(HalfNumElts, -1);
18828   for (unsigned i = 0; i != NumElts; ++i) {
18829     if (Mask[i] == -1)
18830       continue;
18831     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
18832     if (i < HalfNumElts)
18833       Mask0[i] = M;
18834     else
18835       Mask1[i - HalfNumElts] = M;
18836   }
18837 
18838   // Ask the target if this is a valid transform.
18839   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18840   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
18841                                 HalfNumElts);
18842   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
18843       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
18844     return SDValue();
18845 
18846   // shuffle (concat X, undef), (concat Y, undef), Mask -->
18847   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
18848   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
18849   SDLoc DL(Shuf);
18850   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
18851   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
18852   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
18853 }
18854 
18855 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
18856 // or turn a shuffle of a single concat into simpler shuffle then concat.
18857 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
18858   EVT VT = N->getValueType(0);
18859   unsigned NumElts = VT.getVectorNumElements();
18860 
18861   SDValue N0 = N->getOperand(0);
18862   SDValue N1 = N->getOperand(1);
18863   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18864   ArrayRef<int> Mask = SVN->getMask();
18865 
18866   SmallVector<SDValue, 4> Ops;
18867   EVT ConcatVT = N0.getOperand(0).getValueType();
18868   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
18869   unsigned NumConcats = NumElts / NumElemsPerConcat;
18870 
18871   auto IsUndefMaskElt = [](int i) { return i == -1; };
18872 
18873   // Special case: shuffle(concat(A,B)) can be more efficiently represented
18874   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
18875   // half vector elements.
18876   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
18877       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
18878                    IsUndefMaskElt)) {
18879     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
18880                               N0.getOperand(1),
18881                               Mask.slice(0, NumElemsPerConcat));
18882     N1 = DAG.getUNDEF(ConcatVT);
18883     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
18884   }
18885 
18886   // Look at every vector that's inserted. We're looking for exact
18887   // subvector-sized copies from a concatenated vector
18888   for (unsigned I = 0; I != NumConcats; ++I) {
18889     unsigned Begin = I * NumElemsPerConcat;
18890     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
18891 
18892     // Make sure we're dealing with a copy.
18893     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
18894       Ops.push_back(DAG.getUNDEF(ConcatVT));
18895       continue;
18896     }
18897 
18898     int OpIdx = -1;
18899     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
18900       if (IsUndefMaskElt(SubMask[i]))
18901         continue;
18902       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
18903         return SDValue();
18904       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
18905       if (0 <= OpIdx && EltOpIdx != OpIdx)
18906         return SDValue();
18907       OpIdx = EltOpIdx;
18908     }
18909     assert(0 <= OpIdx && "Unknown concat_vectors op");
18910 
18911     if (OpIdx < (int)N0.getNumOperands())
18912       Ops.push_back(N0.getOperand(OpIdx));
18913     else
18914       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
18915   }
18916 
18917   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18918 }
18919 
18920 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18921 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18922 //
18923 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
18924 // a simplification in some sense, but it isn't appropriate in general: some
18925 // BUILD_VECTORs are substantially cheaper than others. The general case
18926 // of a BUILD_VECTOR requires inserting each element individually (or
18927 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
18928 // all constants is a single constant pool load.  A BUILD_VECTOR where each
18929 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
18930 // are undef lowers to a small number of element insertions.
18931 //
18932 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
18933 // We don't fold shuffles where one side is a non-zero constant, and we don't
18934 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
18935 // non-constant operands. This seems to work out reasonably well in practice.
18936 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
18937                                        SelectionDAG &DAG,
18938                                        const TargetLowering &TLI) {
18939   EVT VT = SVN->getValueType(0);
18940   unsigned NumElts = VT.getVectorNumElements();
18941   SDValue N0 = SVN->getOperand(0);
18942   SDValue N1 = SVN->getOperand(1);
18943 
18944   if (!N0->hasOneUse())
18945     return SDValue();
18946 
18947   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
18948   // discussed above.
18949   if (!N1.isUndef()) {
18950     if (!N1->hasOneUse())
18951       return SDValue();
18952 
18953     bool N0AnyConst = isAnyConstantBuildVector(N0);
18954     bool N1AnyConst = isAnyConstantBuildVector(N1);
18955     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
18956       return SDValue();
18957     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
18958       return SDValue();
18959   }
18960 
18961   // If both inputs are splats of the same value then we can safely merge this
18962   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
18963   bool IsSplat = false;
18964   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
18965   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
18966   if (BV0 && BV1)
18967     if (SDValue Splat0 = BV0->getSplatValue())
18968       IsSplat = (Splat0 == BV1->getSplatValue());
18969 
18970   SmallVector<SDValue, 8> Ops;
18971   SmallSet<SDValue, 16> DuplicateOps;
18972   for (int M : SVN->getMask()) {
18973     SDValue Op = DAG.getUNDEF(VT.getScalarType());
18974     if (M >= 0) {
18975       int Idx = M < (int)NumElts ? M : M - NumElts;
18976       SDValue &S = (M < (int)NumElts ? N0 : N1);
18977       if (S.getOpcode() == ISD::BUILD_VECTOR) {
18978         Op = S.getOperand(Idx);
18979       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18980         SDValue Op0 = S.getOperand(0);
18981         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
18982       } else {
18983         // Operand can't be combined - bail out.
18984         return SDValue();
18985       }
18986     }
18987 
18988     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
18989     // generating a splat; semantically, this is fine, but it's likely to
18990     // generate low-quality code if the target can't reconstruct an appropriate
18991     // shuffle.
18992     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
18993       if (!IsSplat && !DuplicateOps.insert(Op).second)
18994         return SDValue();
18995 
18996     Ops.push_back(Op);
18997   }
18998 
18999   // BUILD_VECTOR requires all inputs to be of the same type, find the
19000   // maximum type and extend them all.
19001   EVT SVT = VT.getScalarType();
19002   if (SVT.isInteger())
19003     for (SDValue &Op : Ops)
19004       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
19005   if (SVT != VT.getScalarType())
19006     for (SDValue &Op : Ops)
19007       Op = TLI.isZExtFree(Op.getValueType(), SVT)
19008                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
19009                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
19010   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
19011 }
19012 
19013 // Match shuffles that can be converted to any_vector_extend_in_reg.
19014 // This is often generated during legalization.
19015 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
19016 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
19017 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
19018                                             SelectionDAG &DAG,
19019                                             const TargetLowering &TLI,
19020                                             bool LegalOperations) {
19021   EVT VT = SVN->getValueType(0);
19022   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19023 
19024   // TODO Add support for big-endian when we have a test case.
19025   if (!VT.isInteger() || IsBigEndian)
19026     return SDValue();
19027 
19028   unsigned NumElts = VT.getVectorNumElements();
19029   unsigned EltSizeInBits = VT.getScalarSizeInBits();
19030   ArrayRef<int> Mask = SVN->getMask();
19031   SDValue N0 = SVN->getOperand(0);
19032 
19033   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
19034   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
19035     for (unsigned i = 0; i != NumElts; ++i) {
19036       if (Mask[i] < 0)
19037         continue;
19038       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
19039         continue;
19040       return false;
19041     }
19042     return true;
19043   };
19044 
19045   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
19046   // power-of-2 extensions as they are the most likely.
19047   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
19048     // Check for non power of 2 vector sizes
19049     if (NumElts % Scale != 0)
19050       continue;
19051     if (!isAnyExtend(Scale))
19052       continue;
19053 
19054     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
19055     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
19056     // Never create an illegal type. Only create unsupported operations if we
19057     // are pre-legalization.
19058     if (TLI.isTypeLegal(OutVT))
19059       if (!LegalOperations ||
19060           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
19061         return DAG.getBitcast(VT,
19062                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
19063                                           SDLoc(SVN), OutVT, N0));
19064   }
19065 
19066   return SDValue();
19067 }
19068 
19069 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
19070 // each source element of a large type into the lowest elements of a smaller
19071 // destination type. This is often generated during legalization.
19072 // If the source node itself was a '*_extend_vector_inreg' node then we should
19073 // then be able to remove it.
19074 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
19075                                         SelectionDAG &DAG) {
19076   EVT VT = SVN->getValueType(0);
19077   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19078 
19079   // TODO Add support for big-endian when we have a test case.
19080   if (!VT.isInteger() || IsBigEndian)
19081     return SDValue();
19082 
19083   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
19084 
19085   unsigned Opcode = N0.getOpcode();
19086   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
19087       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
19088       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
19089     return SDValue();
19090 
19091   SDValue N00 = N0.getOperand(0);
19092   ArrayRef<int> Mask = SVN->getMask();
19093   unsigned NumElts = VT.getVectorNumElements();
19094   unsigned EltSizeInBits = VT.getScalarSizeInBits();
19095   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
19096   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
19097 
19098   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
19099     return SDValue();
19100   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
19101 
19102   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
19103   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
19104   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
19105   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
19106     for (unsigned i = 0; i != NumElts; ++i) {
19107       if (Mask[i] < 0)
19108         continue;
19109       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
19110         continue;
19111       return false;
19112     }
19113     return true;
19114   };
19115 
19116   // At the moment we just handle the case where we've truncated back to the
19117   // same size as before the extension.
19118   // TODO: handle more extension/truncation cases as cases arise.
19119   if (EltSizeInBits != ExtSrcSizeInBits)
19120     return SDValue();
19121 
19122   // We can remove *extend_vector_inreg only if the truncation happens at
19123   // the same scale as the extension.
19124   if (isTruncate(ExtScale))
19125     return DAG.getBitcast(VT, N00);
19126 
19127   return SDValue();
19128 }
19129 
19130 // Combine shuffles of splat-shuffles of the form:
19131 // shuffle (shuffle V, undef, splat-mask), undef, M
19132 // If splat-mask contains undef elements, we need to be careful about
19133 // introducing undef's in the folded mask which are not the result of composing
19134 // the masks of the shuffles.
19135 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
19136                                         SelectionDAG &DAG) {
19137   if (!Shuf->getOperand(1).isUndef())
19138     return SDValue();
19139   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
19140   if (!Splat || !Splat->isSplat())
19141     return SDValue();
19142 
19143   ArrayRef<int> ShufMask = Shuf->getMask();
19144   ArrayRef<int> SplatMask = Splat->getMask();
19145   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
19146 
19147   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
19148   // every undef mask element in the splat-shuffle has a corresponding undef
19149   // element in the user-shuffle's mask or if the composition of mask elements
19150   // would result in undef.
19151   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
19152   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
19153   //   In this case it is not legal to simplify to the splat-shuffle because we
19154   //   may be exposing the users of the shuffle an undef element at index 1
19155   //   which was not there before the combine.
19156   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
19157   //   In this case the composition of masks yields SplatMask, so it's ok to
19158   //   simplify to the splat-shuffle.
19159   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
19160   //   In this case the composed mask includes all undef elements of SplatMask
19161   //   and in addition sets element zero to undef. It is safe to simplify to
19162   //   the splat-shuffle.
19163   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
19164                                        ArrayRef<int> SplatMask) {
19165     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
19166       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
19167           SplatMask[UserMask[i]] != -1)
19168         return false;
19169     return true;
19170   };
19171   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
19172     return Shuf->getOperand(0);
19173 
19174   // Create a new shuffle with a mask that is composed of the two shuffles'
19175   // masks.
19176   SmallVector<int, 32> NewMask;
19177   for (int Idx : ShufMask)
19178     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
19179 
19180   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
19181                               Splat->getOperand(0), Splat->getOperand(1),
19182                               NewMask);
19183 }
19184 
19185 /// If the shuffle mask is taking exactly one element from the first vector
19186 /// operand and passing through all other elements from the second vector
19187 /// operand, return the index of the mask element that is choosing an element
19188 /// from the first operand. Otherwise, return -1.
19189 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
19190   int MaskSize = Mask.size();
19191   int EltFromOp0 = -1;
19192   // TODO: This does not match if there are undef elements in the shuffle mask.
19193   // Should we ignore undefs in the shuffle mask instead? The trade-off is
19194   // removing an instruction (a shuffle), but losing the knowledge that some
19195   // vector lanes are not needed.
19196   for (int i = 0; i != MaskSize; ++i) {
19197     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
19198       // We're looking for a shuffle of exactly one element from operand 0.
19199       if (EltFromOp0 != -1)
19200         return -1;
19201       EltFromOp0 = i;
19202     } else if (Mask[i] != i + MaskSize) {
19203       // Nothing from operand 1 can change lanes.
19204       return -1;
19205     }
19206   }
19207   return EltFromOp0;
19208 }
19209 
19210 /// If a shuffle inserts exactly one element from a source vector operand into
19211 /// another vector operand and we can access the specified element as a scalar,
19212 /// then we can eliminate the shuffle.
19213 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
19214                                       SelectionDAG &DAG) {
19215   // First, check if we are taking one element of a vector and shuffling that
19216   // element into another vector.
19217   ArrayRef<int> Mask = Shuf->getMask();
19218   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
19219   SDValue Op0 = Shuf->getOperand(0);
19220   SDValue Op1 = Shuf->getOperand(1);
19221   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
19222   if (ShufOp0Index == -1) {
19223     // Commute mask and check again.
19224     ShuffleVectorSDNode::commuteMask(CommutedMask);
19225     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
19226     if (ShufOp0Index == -1)
19227       return SDValue();
19228     // Commute operands to match the commuted shuffle mask.
19229     std::swap(Op0, Op1);
19230     Mask = CommutedMask;
19231   }
19232 
19233   // The shuffle inserts exactly one element from operand 0 into operand 1.
19234   // Now see if we can access that element as a scalar via a real insert element
19235   // instruction.
19236   // TODO: We can try harder to locate the element as a scalar. Examples: it
19237   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
19238   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
19239          "Shuffle mask value must be from operand 0");
19240   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
19241     return SDValue();
19242 
19243   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
19244   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
19245     return SDValue();
19246 
19247   // There's an existing insertelement with constant insertion index, so we
19248   // don't need to check the legality/profitability of a replacement operation
19249   // that differs at most in the constant value. The target should be able to
19250   // lower any of those in a similar way. If not, legalization will expand this
19251   // to a scalar-to-vector plus shuffle.
19252   //
19253   // Note that the shuffle may move the scalar from the position that the insert
19254   // element used. Therefore, our new insert element occurs at the shuffle's
19255   // mask index value, not the insert's index value.
19256   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
19257   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
19258   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
19259                      Op1, Op0.getOperand(1), NewInsIndex);
19260 }
19261 
19262 /// If we have a unary shuffle of a shuffle, see if it can be folded away
19263 /// completely. This has the potential to lose undef knowledge because the first
19264 /// shuffle may not have an undef mask element where the second one does. So
19265 /// only call this after doing simplifications based on demanded elements.
19266 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
19267   // shuf (shuf0 X, Y, Mask0), undef, Mask
19268   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
19269   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
19270     return SDValue();
19271 
19272   ArrayRef<int> Mask = Shuf->getMask();
19273   ArrayRef<int> Mask0 = Shuf0->getMask();
19274   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
19275     // Ignore undef elements.
19276     if (Mask[i] == -1)
19277       continue;
19278     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
19279 
19280     // Is the element of the shuffle operand chosen by this shuffle the same as
19281     // the element chosen by the shuffle operand itself?
19282     if (Mask0[Mask[i]] != Mask0[i])
19283       return SDValue();
19284   }
19285   // Every element of this shuffle is identical to the result of the previous
19286   // shuffle, so we can replace this value.
19287   return Shuf->getOperand(0);
19288 }
19289 
19290 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
19291   EVT VT = N->getValueType(0);
19292   unsigned NumElts = VT.getVectorNumElements();
19293 
19294   SDValue N0 = N->getOperand(0);
19295   SDValue N1 = N->getOperand(1);
19296 
19297   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
19298 
19299   // Canonicalize shuffle undef, undef -> undef
19300   if (N0.isUndef() && N1.isUndef())
19301     return DAG.getUNDEF(VT);
19302 
19303   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
19304 
19305   // Canonicalize shuffle v, v -> v, undef
19306   if (N0 == N1) {
19307     SmallVector<int, 8> NewMask;
19308     for (unsigned i = 0; i != NumElts; ++i) {
19309       int Idx = SVN->getMaskElt(i);
19310       if (Idx >= (int)NumElts) Idx -= NumElts;
19311       NewMask.push_back(Idx);
19312     }
19313     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
19314   }
19315 
19316   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
19317   if (N0.isUndef())
19318     return DAG.getCommutedVectorShuffle(*SVN);
19319 
19320   // Remove references to rhs if it is undef
19321   if (N1.isUndef()) {
19322     bool Changed = false;
19323     SmallVector<int, 8> NewMask;
19324     for (unsigned i = 0; i != NumElts; ++i) {
19325       int Idx = SVN->getMaskElt(i);
19326       if (Idx >= (int)NumElts) {
19327         Idx = -1;
19328         Changed = true;
19329       }
19330       NewMask.push_back(Idx);
19331     }
19332     if (Changed)
19333       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
19334   }
19335 
19336   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
19337     return InsElt;
19338 
19339   // A shuffle of a single vector that is a splatted value can always be folded.
19340   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
19341     return V;
19342 
19343   // If it is a splat, check if the argument vector is another splat or a
19344   // build_vector.
19345   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
19346     int SplatIndex = SVN->getSplatIndex();
19347     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
19348         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
19349       // splat (vector_bo L, R), Index -->
19350       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
19351       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
19352       SDLoc DL(N);
19353       EVT EltVT = VT.getScalarType();
19354       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
19355       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
19356       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
19357       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
19358                                   N0.getNode()->getFlags());
19359       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
19360       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
19361       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
19362     }
19363 
19364     // If this is a bit convert that changes the element type of the vector but
19365     // not the number of vector elements, look through it.  Be careful not to
19366     // look though conversions that change things like v4f32 to v2f64.
19367     SDNode *V = N0.getNode();
19368     if (V->getOpcode() == ISD::BITCAST) {
19369       SDValue ConvInput = V->getOperand(0);
19370       if (ConvInput.getValueType().isVector() &&
19371           ConvInput.getValueType().getVectorNumElements() == NumElts)
19372         V = ConvInput.getNode();
19373     }
19374 
19375     if (V->getOpcode() == ISD::BUILD_VECTOR) {
19376       assert(V->getNumOperands() == NumElts &&
19377              "BUILD_VECTOR has wrong number of operands");
19378       SDValue Base;
19379       bool AllSame = true;
19380       for (unsigned i = 0; i != NumElts; ++i) {
19381         if (!V->getOperand(i).isUndef()) {
19382           Base = V->getOperand(i);
19383           break;
19384         }
19385       }
19386       // Splat of <u, u, u, u>, return <u, u, u, u>
19387       if (!Base.getNode())
19388         return N0;
19389       for (unsigned i = 0; i != NumElts; ++i) {
19390         if (V->getOperand(i) != Base) {
19391           AllSame = false;
19392           break;
19393         }
19394       }
19395       // Splat of <x, x, x, x>, return <x, x, x, x>
19396       if (AllSame)
19397         return N0;
19398 
19399       // Canonicalize any other splat as a build_vector.
19400       SDValue Splatted = V->getOperand(SplatIndex);
19401       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
19402       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
19403 
19404       // We may have jumped through bitcasts, so the type of the
19405       // BUILD_VECTOR may not match the type of the shuffle.
19406       if (V->getValueType(0) != VT)
19407         NewBV = DAG.getBitcast(VT, NewBV);
19408       return NewBV;
19409     }
19410   }
19411 
19412   // Simplify source operands based on shuffle mask.
19413   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19414     return SDValue(N, 0);
19415 
19416   // This is intentionally placed after demanded elements simplification because
19417   // it could eliminate knowledge of undef elements created by this shuffle.
19418   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
19419     return ShufOp;
19420 
19421   // Match shuffles that can be converted to any_vector_extend_in_reg.
19422   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
19423     return V;
19424 
19425   // Combine "truncate_vector_in_reg" style shuffles.
19426   if (SDValue V = combineTruncationShuffle(SVN, DAG))
19427     return V;
19428 
19429   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
19430       Level < AfterLegalizeVectorOps &&
19431       (N1.isUndef() ||
19432       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
19433        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
19434     if (SDValue V = partitionShuffleOfConcats(N, DAG))
19435       return V;
19436   }
19437 
19438   // A shuffle of a concat of the same narrow vector can be reduced to use
19439   // only low-half elements of a concat with undef:
19440   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
19441   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
19442       N0.getNumOperands() == 2 &&
19443       N0.getOperand(0) == N0.getOperand(1)) {
19444     int HalfNumElts = (int)NumElts / 2;
19445     SmallVector<int, 8> NewMask;
19446     for (unsigned i = 0; i != NumElts; ++i) {
19447       int Idx = SVN->getMaskElt(i);
19448       if (Idx >= HalfNumElts) {
19449         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
19450         Idx -= HalfNumElts;
19451       }
19452       NewMask.push_back(Idx);
19453     }
19454     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
19455       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
19456       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
19457                                    N0.getOperand(0), UndefVec);
19458       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
19459     }
19460   }
19461 
19462   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
19463   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
19464   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
19465     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
19466       return Res;
19467 
19468   // If this shuffle only has a single input that is a bitcasted shuffle,
19469   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
19470   // back to their original types.
19471   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
19472       N1.isUndef() && Level < AfterLegalizeVectorOps &&
19473       TLI.isTypeLegal(VT)) {
19474     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
19475       if (Scale == 1)
19476         return SmallVector<int, 8>(Mask.begin(), Mask.end());
19477 
19478       SmallVector<int, 8> NewMask;
19479       for (int M : Mask)
19480         for (int s = 0; s != Scale; ++s)
19481           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
19482       return NewMask;
19483     };
19484 
19485     SDValue BC0 = peekThroughOneUseBitcasts(N0);
19486     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
19487       EVT SVT = VT.getScalarType();
19488       EVT InnerVT = BC0->getValueType(0);
19489       EVT InnerSVT = InnerVT.getScalarType();
19490 
19491       // Determine which shuffle works with the smaller scalar type.
19492       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
19493       EVT ScaleSVT = ScaleVT.getScalarType();
19494 
19495       if (TLI.isTypeLegal(ScaleVT) &&
19496           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
19497           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
19498         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19499         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19500 
19501         // Scale the shuffle masks to the smaller scalar type.
19502         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
19503         SmallVector<int, 8> InnerMask =
19504             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
19505         SmallVector<int, 8> OuterMask =
19506             ScaleShuffleMask(SVN->getMask(), OuterScale);
19507 
19508         // Merge the shuffle masks.
19509         SmallVector<int, 8> NewMask;
19510         for (int M : OuterMask)
19511           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
19512 
19513         // Test for shuffle mask legality over both commutations.
19514         SDValue SV0 = BC0->getOperand(0);
19515         SDValue SV1 = BC0->getOperand(1);
19516         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19517         if (!LegalMask) {
19518           std::swap(SV0, SV1);
19519           ShuffleVectorSDNode::commuteMask(NewMask);
19520           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19521         }
19522 
19523         if (LegalMask) {
19524           SV0 = DAG.getBitcast(ScaleVT, SV0);
19525           SV1 = DAG.getBitcast(ScaleVT, SV1);
19526           return DAG.getBitcast(
19527               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
19528         }
19529       }
19530     }
19531   }
19532 
19533   // Canonicalize shuffles according to rules:
19534   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
19535   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
19536   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
19537   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
19538       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
19539       TLI.isTypeLegal(VT)) {
19540     // The incoming shuffle must be of the same type as the result of the
19541     // current shuffle.
19542     assert(N1->getOperand(0).getValueType() == VT &&
19543            "Shuffle types don't match");
19544 
19545     SDValue SV0 = N1->getOperand(0);
19546     SDValue SV1 = N1->getOperand(1);
19547     bool HasSameOp0 = N0 == SV0;
19548     bool IsSV1Undef = SV1.isUndef();
19549     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
19550       // Commute the operands of this shuffle so that next rule
19551       // will trigger.
19552       return DAG.getCommutedVectorShuffle(*SVN);
19553   }
19554 
19555   // Try to fold according to rules:
19556   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19557   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19558   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19559   // Don't try to fold shuffles with illegal type.
19560   // Only fold if this shuffle is the only user of the other shuffle.
19561   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
19562       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
19563     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
19564 
19565     // Don't try to fold splats; they're likely to simplify somehow, or they
19566     // might be free.
19567     if (OtherSV->isSplat())
19568       return SDValue();
19569 
19570     // The incoming shuffle must be of the same type as the result of the
19571     // current shuffle.
19572     assert(OtherSV->getOperand(0).getValueType() == VT &&
19573            "Shuffle types don't match");
19574 
19575     SDValue SV0, SV1;
19576     SmallVector<int, 4> Mask;
19577     // Compute the combined shuffle mask for a shuffle with SV0 as the first
19578     // operand, and SV1 as the second operand.
19579     for (unsigned i = 0; i != NumElts; ++i) {
19580       int Idx = SVN->getMaskElt(i);
19581       if (Idx < 0) {
19582         // Propagate Undef.
19583         Mask.push_back(Idx);
19584         continue;
19585       }
19586 
19587       SDValue CurrentVec;
19588       if (Idx < (int)NumElts) {
19589         // This shuffle index refers to the inner shuffle N0. Lookup the inner
19590         // shuffle mask to identify which vector is actually referenced.
19591         Idx = OtherSV->getMaskElt(Idx);
19592         if (Idx < 0) {
19593           // Propagate Undef.
19594           Mask.push_back(Idx);
19595           continue;
19596         }
19597 
19598         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
19599                                            : OtherSV->getOperand(1);
19600       } else {
19601         // This shuffle index references an element within N1.
19602         CurrentVec = N1;
19603       }
19604 
19605       // Simple case where 'CurrentVec' is UNDEF.
19606       if (CurrentVec.isUndef()) {
19607         Mask.push_back(-1);
19608         continue;
19609       }
19610 
19611       // Canonicalize the shuffle index. We don't know yet if CurrentVec
19612       // will be the first or second operand of the combined shuffle.
19613       Idx = Idx % NumElts;
19614       if (!SV0.getNode() || SV0 == CurrentVec) {
19615         // Ok. CurrentVec is the left hand side.
19616         // Update the mask accordingly.
19617         SV0 = CurrentVec;
19618         Mask.push_back(Idx);
19619         continue;
19620       }
19621 
19622       // Bail out if we cannot convert the shuffle pair into a single shuffle.
19623       if (SV1.getNode() && SV1 != CurrentVec)
19624         return SDValue();
19625 
19626       // Ok. CurrentVec is the right hand side.
19627       // Update the mask accordingly.
19628       SV1 = CurrentVec;
19629       Mask.push_back(Idx + NumElts);
19630     }
19631 
19632     // Check if all indices in Mask are Undef. In case, propagate Undef.
19633     bool isUndefMask = true;
19634     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
19635       isUndefMask &= Mask[i] < 0;
19636 
19637     if (isUndefMask)
19638       return DAG.getUNDEF(VT);
19639 
19640     if (!SV0.getNode())
19641       SV0 = DAG.getUNDEF(VT);
19642     if (!SV1.getNode())
19643       SV1 = DAG.getUNDEF(VT);
19644 
19645     // Avoid introducing shuffles with illegal mask.
19646     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19647     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19648     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19649     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
19650     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
19651     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
19652     return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
19653   }
19654 
19655   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
19656     return V;
19657 
19658   return SDValue();
19659 }
19660 
19661 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
19662   SDValue InVal = N->getOperand(0);
19663   EVT VT = N->getValueType(0);
19664 
19665   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
19666   // with a VECTOR_SHUFFLE and possible truncate.
19667   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
19668     SDValue InVec = InVal->getOperand(0);
19669     SDValue EltNo = InVal->getOperand(1);
19670     auto InVecT = InVec.getValueType();
19671     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
19672       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
19673       int Elt = C0->getZExtValue();
19674       NewMask[0] = Elt;
19675       // If we have an implict truncate do truncate here as long as it's legal.
19676       // if it's not legal, this should
19677       if (VT.getScalarType() != InVal.getValueType() &&
19678           InVal.getValueType().isScalarInteger() &&
19679           isTypeLegal(VT.getScalarType())) {
19680         SDValue Val =
19681             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
19682         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
19683       }
19684       if (VT.getScalarType() == InVecT.getScalarType() &&
19685           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
19686         SDValue LegalShuffle =
19687           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
19688                                       DAG.getUNDEF(InVecT), NewMask, DAG);
19689         if (LegalShuffle) {
19690           // If the initial vector is the correct size this shuffle is a
19691           // valid result.
19692           if (VT == InVecT)
19693             return LegalShuffle;
19694           // If not we must truncate the vector.
19695           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
19696             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
19697             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
19698                                          InVecT.getVectorElementType(),
19699                                          VT.getVectorNumElements());
19700             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
19701                                LegalShuffle, ZeroIdx);
19702           }
19703         }
19704       }
19705     }
19706   }
19707 
19708   return SDValue();
19709 }
19710 
19711 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
19712   EVT VT = N->getValueType(0);
19713   SDValue N0 = N->getOperand(0);
19714   SDValue N1 = N->getOperand(1);
19715   SDValue N2 = N->getOperand(2);
19716 
19717   // If inserting an UNDEF, just return the original vector.
19718   if (N1.isUndef())
19719     return N0;
19720 
19721   // If this is an insert of an extracted vector into an undef vector, we can
19722   // just use the input to the extract.
19723   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19724       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
19725     return N1.getOperand(0);
19726 
19727   // If we are inserting a bitcast value into an undef, with the same
19728   // number of elements, just use the bitcast input of the extract.
19729   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
19730   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
19731   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
19732       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19733       N1.getOperand(0).getOperand(1) == N2 &&
19734       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
19735           VT.getVectorNumElements() &&
19736       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
19737           VT.getSizeInBits()) {
19738     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
19739   }
19740 
19741   // If both N1 and N2 are bitcast values on which insert_subvector
19742   // would makes sense, pull the bitcast through.
19743   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
19744   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
19745   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
19746     SDValue CN0 = N0.getOperand(0);
19747     SDValue CN1 = N1.getOperand(0);
19748     EVT CN0VT = CN0.getValueType();
19749     EVT CN1VT = CN1.getValueType();
19750     if (CN0VT.isVector() && CN1VT.isVector() &&
19751         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
19752         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
19753       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
19754                                       CN0.getValueType(), CN0, CN1, N2);
19755       return DAG.getBitcast(VT, NewINSERT);
19756     }
19757   }
19758 
19759   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
19760   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
19761   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
19762   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
19763       N0.getOperand(1).getValueType() == N1.getValueType() &&
19764       N0.getOperand(2) == N2)
19765     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
19766                        N1, N2);
19767 
19768   // Eliminate an intermediate insert into an undef vector:
19769   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
19770   // insert_subvector undef, X, N2
19771   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
19772       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
19773     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
19774                        N1.getOperand(1), N2);
19775 
19776   if (!isa<ConstantSDNode>(N2))
19777     return SDValue();
19778 
19779   uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
19780 
19781   // Push subvector bitcasts to the output, adjusting the index as we go.
19782   // insert_subvector(bitcast(v), bitcast(s), c1)
19783   // -> bitcast(insert_subvector(v, s, c2))
19784   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
19785       N1.getOpcode() == ISD::BITCAST) {
19786     SDValue N0Src = peekThroughBitcasts(N0);
19787     SDValue N1Src = peekThroughBitcasts(N1);
19788     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
19789     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
19790     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
19791         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
19792       EVT NewVT;
19793       SDLoc DL(N);
19794       SDValue NewIdx;
19795       LLVMContext &Ctx = *DAG.getContext();
19796       unsigned NumElts = VT.getVectorNumElements();
19797       unsigned EltSizeInBits = VT.getScalarSizeInBits();
19798       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
19799         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
19800         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
19801         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
19802       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
19803         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
19804         if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
19805           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
19806           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
19807         }
19808       }
19809       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
19810         SDValue Res = DAG.getBitcast(NewVT, N0Src);
19811         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
19812         return DAG.getBitcast(VT, Res);
19813       }
19814     }
19815   }
19816 
19817   // Canonicalize insert_subvector dag nodes.
19818   // Example:
19819   // (insert_subvector (insert_subvector A, Idx0), Idx1)
19820   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
19821   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
19822       N1.getValueType() == N0.getOperand(1).getValueType() &&
19823       isa<ConstantSDNode>(N0.getOperand(2))) {
19824     unsigned OtherIdx = N0.getConstantOperandVal(2);
19825     if (InsIdx < OtherIdx) {
19826       // Swap nodes.
19827       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
19828                                   N0.getOperand(0), N1, N2);
19829       AddToWorklist(NewOp.getNode());
19830       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
19831                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
19832     }
19833   }
19834 
19835   // If the input vector is a concatenation, and the insert replaces
19836   // one of the pieces, we can optimize into a single concat_vectors.
19837   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
19838       N0.getOperand(0).getValueType() == N1.getValueType()) {
19839     unsigned Factor = N1.getValueType().getVectorNumElements();
19840 
19841     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
19842     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
19843 
19844     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19845   }
19846 
19847   // Simplify source operands based on insertion.
19848   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19849     return SDValue(N, 0);
19850 
19851   return SDValue();
19852 }
19853 
19854 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
19855   SDValue N0 = N->getOperand(0);
19856 
19857   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
19858   if (N0->getOpcode() == ISD::FP16_TO_FP)
19859     return N0->getOperand(0);
19860 
19861   return SDValue();
19862 }
19863 
19864 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
19865   SDValue N0 = N->getOperand(0);
19866 
19867   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
19868   if (N0->getOpcode() == ISD::AND) {
19869     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
19870     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
19871       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
19872                          N0.getOperand(0));
19873     }
19874   }
19875 
19876   return SDValue();
19877 }
19878 
19879 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
19880   SDValue N0 = N->getOperand(0);
19881   EVT VT = N0.getValueType();
19882   unsigned Opcode = N->getOpcode();
19883 
19884   // VECREDUCE over 1-element vector is just an extract.
19885   if (VT.getVectorNumElements() == 1) {
19886     SDLoc dl(N);
19887     SDValue Res =
19888         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
19889                     DAG.getVectorIdxConstant(0, dl));
19890     if (Res.getValueType() != N->getValueType(0))
19891       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
19892     return Res;
19893   }
19894 
19895   // On an boolean vector an and/or reduction is the same as a umin/umax
19896   // reduction. Convert them if the latter is legal while the former isn't.
19897   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
19898     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
19899         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
19900     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
19901         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
19902         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
19903       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
19904   }
19905 
19906   return SDValue();
19907 }
19908 
19909 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
19910 /// with the destination vector and a zero vector.
19911 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
19912 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
19913 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
19914   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
19915 
19916   EVT VT = N->getValueType(0);
19917   SDValue LHS = N->getOperand(0);
19918   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
19919   SDLoc DL(N);
19920 
19921   // Make sure we're not running after operation legalization where it
19922   // may have custom lowered the vector shuffles.
19923   if (LegalOperations)
19924     return SDValue();
19925 
19926   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19927     return SDValue();
19928 
19929   EVT RVT = RHS.getValueType();
19930   unsigned NumElts = RHS.getNumOperands();
19931 
19932   // Attempt to create a valid clear mask, splitting the mask into
19933   // sub elements and checking to see if each is
19934   // all zeros or all ones - suitable for shuffle masking.
19935   auto BuildClearMask = [&](int Split) {
19936     int NumSubElts = NumElts * Split;
19937     int NumSubBits = RVT.getScalarSizeInBits() / Split;
19938 
19939     SmallVector<int, 8> Indices;
19940     for (int i = 0; i != NumSubElts; ++i) {
19941       int EltIdx = i / Split;
19942       int SubIdx = i % Split;
19943       SDValue Elt = RHS.getOperand(EltIdx);
19944       // X & undef --> 0 (not undef). So this lane must be converted to choose
19945       // from the zero constant vector (same as if the element had all 0-bits).
19946       if (Elt.isUndef()) {
19947         Indices.push_back(i + NumSubElts);
19948         continue;
19949       }
19950 
19951       APInt Bits;
19952       if (isa<ConstantSDNode>(Elt))
19953         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
19954       else if (isa<ConstantFPSDNode>(Elt))
19955         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
19956       else
19957         return SDValue();
19958 
19959       // Extract the sub element from the constant bit mask.
19960       if (DAG.getDataLayout().isBigEndian())
19961         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
19962       else
19963         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
19964 
19965       if (Bits.isAllOnesValue())
19966         Indices.push_back(i);
19967       else if (Bits == 0)
19968         Indices.push_back(i + NumSubElts);
19969       else
19970         return SDValue();
19971     }
19972 
19973     // Let's see if the target supports this vector_shuffle.
19974     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
19975     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
19976     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
19977       return SDValue();
19978 
19979     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
19980     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
19981                                                    DAG.getBitcast(ClearVT, LHS),
19982                                                    Zero, Indices));
19983   };
19984 
19985   // Determine maximum split level (byte level masking).
19986   int MaxSplit = 1;
19987   if (RVT.getScalarSizeInBits() % 8 == 0)
19988     MaxSplit = RVT.getScalarSizeInBits() / 8;
19989 
19990   for (int Split = 1; Split <= MaxSplit; ++Split)
19991     if (RVT.getScalarSizeInBits() % Split == 0)
19992       if (SDValue S = BuildClearMask(Split))
19993         return S;
19994 
19995   return SDValue();
19996 }
19997 
19998 /// If a vector binop is performed on splat values, it may be profitable to
19999 /// extract, scalarize, and insert/splat.
20000 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
20001   SDValue N0 = N->getOperand(0);
20002   SDValue N1 = N->getOperand(1);
20003   unsigned Opcode = N->getOpcode();
20004   EVT VT = N->getValueType(0);
20005   EVT EltVT = VT.getVectorElementType();
20006   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20007 
20008   // TODO: Remove/replace the extract cost check? If the elements are available
20009   //       as scalars, then there may be no extract cost. Should we ask if
20010   //       inserting a scalar back into a vector is cheap instead?
20011   int Index0, Index1;
20012   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
20013   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
20014   if (!Src0 || !Src1 || Index0 != Index1 ||
20015       Src0.getValueType().getVectorElementType() != EltVT ||
20016       Src1.getValueType().getVectorElementType() != EltVT ||
20017       !TLI.isExtractVecEltCheap(VT, Index0) ||
20018       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
20019     return SDValue();
20020 
20021   SDLoc DL(N);
20022   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
20023   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
20024   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
20025   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
20026 
20027   // If all lanes but 1 are undefined, no need to splat the scalar result.
20028   // TODO: Keep track of undefs and use that info in the general case.
20029   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
20030       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
20031       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
20032     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
20033     // build_vec ..undef, (bo X, Y), undef...
20034     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
20035     Ops[Index0] = ScalarBO;
20036     return DAG.getBuildVector(VT, DL, Ops);
20037   }
20038 
20039   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
20040   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
20041   return DAG.getBuildVector(VT, DL, Ops);
20042 }
20043 
20044 /// Visit a binary vector operation, like ADD.
20045 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
20046   assert(N->getValueType(0).isVector() &&
20047          "SimplifyVBinOp only works on vectors!");
20048 
20049   SDValue LHS = N->getOperand(0);
20050   SDValue RHS = N->getOperand(1);
20051   SDValue Ops[] = {LHS, RHS};
20052   EVT VT = N->getValueType(0);
20053   unsigned Opcode = N->getOpcode();
20054 
20055   // See if we can constant fold the vector operation.
20056   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
20057           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
20058     return Fold;
20059 
20060   // Move unary shuffles with identical masks after a vector binop:
20061   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
20062   //   --> shuffle (VBinOp A, B), Undef, Mask
20063   // This does not require type legality checks because we are creating the
20064   // same types of operations that are in the original sequence. We do have to
20065   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
20066   // though. This code is adapted from the identical transform in instcombine.
20067   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
20068       Opcode != ISD::UREM && Opcode != ISD::SREM &&
20069       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
20070     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
20071     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
20072     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
20073         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
20074         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
20075       SDLoc DL(N);
20076       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
20077                                      RHS.getOperand(0), N->getFlags());
20078       SDValue UndefV = LHS.getOperand(1);
20079       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
20080     }
20081   }
20082 
20083   // The following pattern is likely to emerge with vector reduction ops. Moving
20084   // the binary operation ahead of insertion may allow using a narrower vector
20085   // instruction that has better performance than the wide version of the op:
20086   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
20087   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
20088       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
20089       LHS.getOperand(2) == RHS.getOperand(2) &&
20090       (LHS.hasOneUse() || RHS.hasOneUse())) {
20091     SDValue X = LHS.getOperand(1);
20092     SDValue Y = RHS.getOperand(1);
20093     SDValue Z = LHS.getOperand(2);
20094     EVT NarrowVT = X.getValueType();
20095     if (NarrowVT == Y.getValueType() &&
20096         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20097       // (binop undef, undef) may not return undef, so compute that result.
20098       SDLoc DL(N);
20099       SDValue VecC =
20100           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
20101       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
20102       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
20103     }
20104   }
20105 
20106   // Make sure all but the first op are undef or constant.
20107   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
20108     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
20109            std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
20110                      [](const SDValue &Op) {
20111                        return Op.isUndef() ||
20112                               ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
20113                      });
20114   };
20115 
20116   // The following pattern is likely to emerge with vector reduction ops. Moving
20117   // the binary operation ahead of the concat may allow using a narrower vector
20118   // instruction that has better performance than the wide version of the op:
20119   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
20120   //   concat (VBinOp X, Y), VecC
20121   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
20122       (LHS.hasOneUse() || RHS.hasOneUse())) {
20123     EVT NarrowVT = LHS.getOperand(0).getValueType();
20124     if (NarrowVT == RHS.getOperand(0).getValueType() &&
20125         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20126       SDLoc DL(N);
20127       unsigned NumOperands = LHS.getNumOperands();
20128       SmallVector<SDValue, 4> ConcatOps;
20129       for (unsigned i = 0; i != NumOperands; ++i) {
20130         // This constant fold for operands 1 and up.
20131         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
20132                                         RHS.getOperand(i)));
20133       }
20134 
20135       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20136     }
20137   }
20138 
20139   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
20140     return V;
20141 
20142   return SDValue();
20143 }
20144 
20145 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
20146                                     SDValue N2) {
20147   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
20148 
20149   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
20150                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
20151 
20152   // If we got a simplified select_cc node back from SimplifySelectCC, then
20153   // break it down into a new SETCC node, and a new SELECT node, and then return
20154   // the SELECT node, since we were called with a SELECT node.
20155   if (SCC.getNode()) {
20156     // Check to see if we got a select_cc back (to turn into setcc/select).
20157     // Otherwise, just return whatever node we got back, like fabs.
20158     if (SCC.getOpcode() == ISD::SELECT_CC) {
20159       const SDNodeFlags Flags = N0.getNode()->getFlags();
20160       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
20161                                   N0.getValueType(),
20162                                   SCC.getOperand(0), SCC.getOperand(1),
20163                                   SCC.getOperand(4), Flags);
20164       AddToWorklist(SETCC.getNode());
20165       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
20166                                          SCC.getOperand(2), SCC.getOperand(3));
20167       SelectNode->setFlags(Flags);
20168       return SelectNode;
20169     }
20170 
20171     return SCC;
20172   }
20173   return SDValue();
20174 }
20175 
20176 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
20177 /// being selected between, see if we can simplify the select.  Callers of this
20178 /// should assume that TheSelect is deleted if this returns true.  As such, they
20179 /// should return the appropriate thing (e.g. the node) back to the top-level of
20180 /// the DAG combiner loop to avoid it being looked at.
20181 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
20182                                     SDValue RHS) {
20183   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20184   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
20185   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
20186     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
20187       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
20188       SDValue Sqrt = RHS;
20189       ISD::CondCode CC;
20190       SDValue CmpLHS;
20191       const ConstantFPSDNode *Zero = nullptr;
20192 
20193       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
20194         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
20195         CmpLHS = TheSelect->getOperand(0);
20196         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
20197       } else {
20198         // SELECT or VSELECT
20199         SDValue Cmp = TheSelect->getOperand(0);
20200         if (Cmp.getOpcode() == ISD::SETCC) {
20201           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
20202           CmpLHS = Cmp.getOperand(0);
20203           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
20204         }
20205       }
20206       if (Zero && Zero->isZero() &&
20207           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
20208           CC == ISD::SETULT || CC == ISD::SETLT)) {
20209         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20210         CombineTo(TheSelect, Sqrt);
20211         return true;
20212       }
20213     }
20214   }
20215   // Cannot simplify select with vector condition
20216   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
20217 
20218   // If this is a select from two identical things, try to pull the operation
20219   // through the select.
20220   if (LHS.getOpcode() != RHS.getOpcode() ||
20221       !LHS.hasOneUse() || !RHS.hasOneUse())
20222     return false;
20223 
20224   // If this is a load and the token chain is identical, replace the select
20225   // of two loads with a load through a select of the address to load from.
20226   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
20227   // constants have been dropped into the constant pool.
20228   if (LHS.getOpcode() == ISD::LOAD) {
20229     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
20230     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
20231 
20232     // Token chains must be identical.
20233     if (LHS.getOperand(0) != RHS.getOperand(0) ||
20234         // Do not let this transformation reduce the number of volatile loads.
20235         // Be conservative for atomics for the moment
20236         // TODO: This does appear to be legal for unordered atomics (see D66309)
20237         !LLD->isSimple() || !RLD->isSimple() ||
20238         // FIXME: If either is a pre/post inc/dec load,
20239         // we'd need to split out the address adjustment.
20240         LLD->isIndexed() || RLD->isIndexed() ||
20241         // If this is an EXTLOAD, the VT's must match.
20242         LLD->getMemoryVT() != RLD->getMemoryVT() ||
20243         // If this is an EXTLOAD, the kind of extension must match.
20244         (LLD->getExtensionType() != RLD->getExtensionType() &&
20245          // The only exception is if one of the extensions is anyext.
20246          LLD->getExtensionType() != ISD::EXTLOAD &&
20247          RLD->getExtensionType() != ISD::EXTLOAD) ||
20248         // FIXME: this discards src value information.  This is
20249         // over-conservative. It would be beneficial to be able to remember
20250         // both potential memory locations.  Since we are discarding
20251         // src value info, don't do the transformation if the memory
20252         // locations are not in the default address space.
20253         LLD->getPointerInfo().getAddrSpace() != 0 ||
20254         RLD->getPointerInfo().getAddrSpace() != 0 ||
20255         // We can't produce a CMOV of a TargetFrameIndex since we won't
20256         // generate the address generation required.
20257         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
20258         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
20259         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
20260                                       LLD->getBasePtr().getValueType()))
20261       return false;
20262 
20263     // The loads must not depend on one another.
20264     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
20265       return false;
20266 
20267     // Check that the select condition doesn't reach either load.  If so,
20268     // folding this will induce a cycle into the DAG.  If not, this is safe to
20269     // xform, so create a select of the addresses.
20270 
20271     SmallPtrSet<const SDNode *, 32> Visited;
20272     SmallVector<const SDNode *, 16> Worklist;
20273 
20274     // Always fail if LLD and RLD are not independent. TheSelect is a
20275     // predecessor to all Nodes in question so we need not search past it.
20276 
20277     Visited.insert(TheSelect);
20278     Worklist.push_back(LLD);
20279     Worklist.push_back(RLD);
20280 
20281     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
20282         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
20283       return false;
20284 
20285     SDValue Addr;
20286     if (TheSelect->getOpcode() == ISD::SELECT) {
20287       // We cannot do this optimization if any pair of {RLD, LLD} is a
20288       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
20289       // Loads, we only need to check if CondNode is a successor to one of the
20290       // loads. We can further avoid this if there's no use of their chain
20291       // value.
20292       SDNode *CondNode = TheSelect->getOperand(0).getNode();
20293       Worklist.push_back(CondNode);
20294 
20295       if ((LLD->hasAnyUseOfValue(1) &&
20296            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
20297           (RLD->hasAnyUseOfValue(1) &&
20298            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
20299         return false;
20300 
20301       Addr = DAG.getSelect(SDLoc(TheSelect),
20302                            LLD->getBasePtr().getValueType(),
20303                            TheSelect->getOperand(0), LLD->getBasePtr(),
20304                            RLD->getBasePtr());
20305     } else {  // Otherwise SELECT_CC
20306       // We cannot do this optimization if any pair of {RLD, LLD} is a
20307       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
20308       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
20309       // one of the loads. We can further avoid this if there's no use of their
20310       // chain value.
20311 
20312       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
20313       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
20314       Worklist.push_back(CondLHS);
20315       Worklist.push_back(CondRHS);
20316 
20317       if ((LLD->hasAnyUseOfValue(1) &&
20318            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
20319           (RLD->hasAnyUseOfValue(1) &&
20320            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
20321         return false;
20322 
20323       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
20324                          LLD->getBasePtr().getValueType(),
20325                          TheSelect->getOperand(0),
20326                          TheSelect->getOperand(1),
20327                          LLD->getBasePtr(), RLD->getBasePtr(),
20328                          TheSelect->getOperand(4));
20329     }
20330 
20331     SDValue Load;
20332     // It is safe to replace the two loads if they have different alignments,
20333     // but the new load must be the minimum (most restrictive) alignment of the
20334     // inputs.
20335     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
20336     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
20337     if (!RLD->isInvariant())
20338       MMOFlags &= ~MachineMemOperand::MOInvariant;
20339     if (!RLD->isDereferenceable())
20340       MMOFlags &= ~MachineMemOperand::MODereferenceable;
20341     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
20342       // FIXME: Discards pointer and AA info.
20343       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
20344                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
20345                          MMOFlags);
20346     } else {
20347       // FIXME: Discards pointer and AA info.
20348       Load = DAG.getExtLoad(
20349           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
20350                                                   : LLD->getExtensionType(),
20351           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
20352           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
20353     }
20354 
20355     // Users of the select now use the result of the load.
20356     CombineTo(TheSelect, Load);
20357 
20358     // Users of the old loads now use the new load's chain.  We know the
20359     // old-load value is dead now.
20360     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
20361     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
20362     return true;
20363   }
20364 
20365   return false;
20366 }
20367 
20368 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
20369 /// bitwise 'and'.
20370 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
20371                                             SDValue N1, SDValue N2, SDValue N3,
20372                                             ISD::CondCode CC) {
20373   // If this is a select where the false operand is zero and the compare is a
20374   // check of the sign bit, see if we can perform the "gzip trick":
20375   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
20376   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
20377   EVT XType = N0.getValueType();
20378   EVT AType = N2.getValueType();
20379   if (!isNullConstant(N3) || !XType.bitsGE(AType))
20380     return SDValue();
20381 
20382   // If the comparison is testing for a positive value, we have to invert
20383   // the sign bit mask, so only do that transform if the target has a bitwise
20384   // 'and not' instruction (the invert is free).
20385   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
20386     // (X > -1) ? A : 0
20387     // (X >  0) ? X : 0 <-- This is canonical signed max.
20388     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
20389       return SDValue();
20390   } else if (CC == ISD::SETLT) {
20391     // (X <  0) ? A : 0
20392     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
20393     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
20394       return SDValue();
20395   } else {
20396     return SDValue();
20397   }
20398 
20399   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
20400   // constant.
20401   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
20402   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
20403   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
20404     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
20405     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
20406       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
20407       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
20408       AddToWorklist(Shift.getNode());
20409 
20410       if (XType.bitsGT(AType)) {
20411         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
20412         AddToWorklist(Shift.getNode());
20413       }
20414 
20415       if (CC == ISD::SETGT)
20416         Shift = DAG.getNOT(DL, Shift, AType);
20417 
20418       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
20419     }
20420   }
20421 
20422   unsigned ShCt = XType.getSizeInBits() - 1;
20423   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
20424     return SDValue();
20425 
20426   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
20427   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
20428   AddToWorklist(Shift.getNode());
20429 
20430   if (XType.bitsGT(AType)) {
20431     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
20432     AddToWorklist(Shift.getNode());
20433   }
20434 
20435   if (CC == ISD::SETGT)
20436     Shift = DAG.getNOT(DL, Shift, AType);
20437 
20438   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
20439 }
20440 
20441 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
20442 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
20443 /// in it. This may be a win when the constant is not otherwise available
20444 /// because it replaces two constant pool loads with one.
20445 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
20446     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
20447     ISD::CondCode CC) {
20448   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
20449     return SDValue();
20450 
20451   // If we are before legalize types, we want the other legalization to happen
20452   // first (for example, to avoid messing with soft float).
20453   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
20454   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
20455   EVT VT = N2.getValueType();
20456   if (!TV || !FV || !TLI.isTypeLegal(VT))
20457     return SDValue();
20458 
20459   // If a constant can be materialized without loads, this does not make sense.
20460   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
20461       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
20462       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
20463     return SDValue();
20464 
20465   // If both constants have multiple uses, then we won't need to do an extra
20466   // load. The values are likely around in registers for other users.
20467   if (!TV->hasOneUse() && !FV->hasOneUse())
20468     return SDValue();
20469 
20470   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
20471                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
20472   Type *FPTy = Elts[0]->getType();
20473   const DataLayout &TD = DAG.getDataLayout();
20474 
20475   // Create a ConstantArray of the two constants.
20476   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
20477   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
20478                                       TD.getPrefTypeAlignment(FPTy));
20479   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
20480 
20481   // Get offsets to the 0 and 1 elements of the array, so we can select between
20482   // them.
20483   SDValue Zero = DAG.getIntPtrConstant(0, DL);
20484   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
20485   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
20486   SDValue Cond =
20487       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
20488   AddToWorklist(Cond.getNode());
20489   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
20490   AddToWorklist(CstOffset.getNode());
20491   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
20492   AddToWorklist(CPIdx.getNode());
20493   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
20494                      MachinePointerInfo::getConstantPool(
20495                          DAG.getMachineFunction()), Alignment);
20496 }
20497 
20498 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
20499 /// where 'cond' is the comparison specified by CC.
20500 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
20501                                       SDValue N2, SDValue N3, ISD::CondCode CC,
20502                                       bool NotExtCompare) {
20503   // (x ? y : y) -> y.
20504   if (N2 == N3) return N2;
20505 
20506   EVT CmpOpVT = N0.getValueType();
20507   EVT CmpResVT = getSetCCResultType(CmpOpVT);
20508   EVT VT = N2.getValueType();
20509   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
20510   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
20511   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
20512 
20513   // Determine if the condition we're dealing with is constant.
20514   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
20515     AddToWorklist(SCC.getNode());
20516     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
20517       // fold select_cc true, x, y -> x
20518       // fold select_cc false, x, y -> y
20519       return !(SCCC->isNullValue()) ? N2 : N3;
20520     }
20521   }
20522 
20523   if (SDValue V =
20524           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
20525     return V;
20526 
20527   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
20528     return V;
20529 
20530   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
20531   // where y is has a single bit set.
20532   // A plaintext description would be, we can turn the SELECT_CC into an AND
20533   // when the condition can be materialized as an all-ones register.  Any
20534   // single bit-test can be materialized as an all-ones register with
20535   // shift-left and shift-right-arith.
20536   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
20537       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
20538     SDValue AndLHS = N0->getOperand(0);
20539     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
20540     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
20541       // Shift the tested bit over the sign bit.
20542       const APInt &AndMask = ConstAndRHS->getAPIntValue();
20543       unsigned ShCt = AndMask.getBitWidth() - 1;
20544       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
20545         SDValue ShlAmt =
20546           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
20547                           getShiftAmountTy(AndLHS.getValueType()));
20548         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
20549 
20550         // Now arithmetic right shift it all the way over, so the result is
20551         // either all-ones, or zero.
20552         SDValue ShrAmt =
20553           DAG.getConstant(ShCt, SDLoc(Shl),
20554                           getShiftAmountTy(Shl.getValueType()));
20555         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
20556 
20557         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
20558       }
20559     }
20560   }
20561 
20562   // fold select C, 16, 0 -> shl C, 4
20563   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
20564   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
20565 
20566   if ((Fold || Swap) &&
20567       TLI.getBooleanContents(CmpOpVT) ==
20568           TargetLowering::ZeroOrOneBooleanContent &&
20569       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
20570 
20571     if (Swap) {
20572       CC = ISD::getSetCCInverse(CC, CmpOpVT);
20573       std::swap(N2C, N3C);
20574     }
20575 
20576     // If the caller doesn't want us to simplify this into a zext of a compare,
20577     // don't do it.
20578     if (NotExtCompare && N2C->isOne())
20579       return SDValue();
20580 
20581     SDValue Temp, SCC;
20582     // zext (setcc n0, n1)
20583     if (LegalTypes) {
20584       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
20585       if (VT.bitsLT(SCC.getValueType()))
20586         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
20587       else
20588         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20589     } else {
20590       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
20591       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20592     }
20593 
20594     AddToWorklist(SCC.getNode());
20595     AddToWorklist(Temp.getNode());
20596 
20597     if (N2C->isOne())
20598       return Temp;
20599 
20600     unsigned ShCt = N2C->getAPIntValue().logBase2();
20601     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
20602       return SDValue();
20603 
20604     // shl setcc result by log2 n2c
20605     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
20606                        DAG.getConstant(ShCt, SDLoc(Temp),
20607                                        getShiftAmountTy(Temp.getValueType())));
20608   }
20609 
20610   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
20611   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
20612   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
20613   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
20614   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
20615   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
20616   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
20617   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
20618   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
20619     SDValue ValueOnZero = N2;
20620     SDValue Count = N3;
20621     // If the condition is NE instead of E, swap the operands.
20622     if (CC == ISD::SETNE)
20623       std::swap(ValueOnZero, Count);
20624     // Check if the value on zero is a constant equal to the bits in the type.
20625     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
20626       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
20627         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
20628         // legal, combine to just cttz.
20629         if ((Count.getOpcode() == ISD::CTTZ ||
20630              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
20631             N0 == Count.getOperand(0) &&
20632             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
20633           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
20634         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
20635         // legal, combine to just ctlz.
20636         if ((Count.getOpcode() == ISD::CTLZ ||
20637              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
20638             N0 == Count.getOperand(0) &&
20639             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
20640           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
20641       }
20642     }
20643   }
20644 
20645   return SDValue();
20646 }
20647 
20648 /// This is a stub for TargetLowering::SimplifySetCC.
20649 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
20650                                    ISD::CondCode Cond, const SDLoc &DL,
20651                                    bool foldBooleans) {
20652   TargetLowering::DAGCombinerInfo
20653     DagCombineInfo(DAG, Level, false, this);
20654   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
20655 }
20656 
20657 /// Given an ISD::SDIV node expressing a divide by constant, return
20658 /// a DAG expression to select that will generate the same value by multiplying
20659 /// by a magic number.
20660 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20661 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
20662   // when optimising for minimum size, we don't want to expand a div to a mul
20663   // and a shift.
20664   if (DAG.getMachineFunction().getFunction().hasMinSize())
20665     return SDValue();
20666 
20667   SmallVector<SDNode *, 8> Built;
20668   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
20669     for (SDNode *N : Built)
20670       AddToWorklist(N);
20671     return S;
20672   }
20673 
20674   return SDValue();
20675 }
20676 
20677 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
20678 /// DAG expression that will generate the same value by right shifting.
20679 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
20680   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
20681   if (!C)
20682     return SDValue();
20683 
20684   // Avoid division by zero.
20685   if (C->isNullValue())
20686     return SDValue();
20687 
20688   SmallVector<SDNode *, 8> Built;
20689   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
20690     for (SDNode *N : Built)
20691       AddToWorklist(N);
20692     return S;
20693   }
20694 
20695   return SDValue();
20696 }
20697 
20698 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
20699 /// expression that will generate the same value by multiplying by a magic
20700 /// number.
20701 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20702 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
20703   // when optimising for minimum size, we don't want to expand a div to a mul
20704   // and a shift.
20705   if (DAG.getMachineFunction().getFunction().hasMinSize())
20706     return SDValue();
20707 
20708   SmallVector<SDNode *, 8> Built;
20709   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
20710     for (SDNode *N : Built)
20711       AddToWorklist(N);
20712     return S;
20713   }
20714 
20715   return SDValue();
20716 }
20717 
20718 /// Determines the LogBase2 value for a non-null input value using the
20719 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
20720 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
20721   EVT VT = V.getValueType();
20722   unsigned EltBits = VT.getScalarSizeInBits();
20723   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
20724   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
20725   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
20726   return LogBase2;
20727 }
20728 
20729 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20730 /// For the reciprocal, we need to find the zero of the function:
20731 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
20732 ///     =>
20733 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
20734 ///     does not require additional intermediate precision]
20735 /// For the last iteration, put numerator N into it to gain more precision:
20736 ///   Result = N X_i + X_i (N - N A X_i)
20737 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
20738                                       SDNodeFlags Flags) {
20739   if (LegalDAG)
20740     return SDValue();
20741 
20742   // TODO: Handle half and/or extended types?
20743   EVT VT = Op.getValueType();
20744   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20745     return SDValue();
20746 
20747   // If estimates are explicitly disabled for this function, we're done.
20748   MachineFunction &MF = DAG.getMachineFunction();
20749   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
20750   if (Enabled == TLI.ReciprocalEstimate::Disabled)
20751     return SDValue();
20752 
20753   // Estimates may be explicitly enabled for this type with a custom number of
20754   // refinement steps.
20755   int Iterations = TLI.getDivRefinementSteps(VT, MF);
20756   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
20757     AddToWorklist(Est.getNode());
20758 
20759     SDLoc DL(Op);
20760     if (Iterations) {
20761       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
20762 
20763       // Newton iterations: Est = Est + Est (N - Arg * Est)
20764       // If this is the last iteration, also multiply by the numerator.
20765       for (int i = 0; i < Iterations; ++i) {
20766         SDValue MulEst = Est;
20767 
20768         if (i == Iterations - 1) {
20769           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
20770           AddToWorklist(MulEst.getNode());
20771         }
20772 
20773         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
20774         AddToWorklist(NewEst.getNode());
20775 
20776         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
20777                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
20778         AddToWorklist(NewEst.getNode());
20779 
20780         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20781         AddToWorklist(NewEst.getNode());
20782 
20783         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
20784         AddToWorklist(Est.getNode());
20785       }
20786     } else {
20787       // If no iterations are available, multiply with N.
20788       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
20789       AddToWorklist(Est.getNode());
20790     }
20791 
20792     return Est;
20793   }
20794 
20795   return SDValue();
20796 }
20797 
20798 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20799 /// For the reciprocal sqrt, we need to find the zero of the function:
20800 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20801 ///     =>
20802 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
20803 /// As a result, we precompute A/2 prior to the iteration loop.
20804 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
20805                                          unsigned Iterations,
20806                                          SDNodeFlags Flags, bool Reciprocal) {
20807   EVT VT = Arg.getValueType();
20808   SDLoc DL(Arg);
20809   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
20810 
20811   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
20812   // this entire sequence requires only one FP constant.
20813   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
20814   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
20815 
20816   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
20817   for (unsigned i = 0; i < Iterations; ++i) {
20818     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
20819     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
20820     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
20821     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20822   }
20823 
20824   // If non-reciprocal square root is requested, multiply the result by Arg.
20825   if (!Reciprocal)
20826     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
20827 
20828   return Est;
20829 }
20830 
20831 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20832 /// For the reciprocal sqrt, we need to find the zero of the function:
20833 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20834 ///     =>
20835 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
20836 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
20837                                          unsigned Iterations,
20838                                          SDNodeFlags Flags, bool Reciprocal) {
20839   EVT VT = Arg.getValueType();
20840   SDLoc DL(Arg);
20841   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
20842   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
20843 
20844   // This routine must enter the loop below to work correctly
20845   // when (Reciprocal == false).
20846   assert(Iterations > 0);
20847 
20848   // Newton iterations for reciprocal square root:
20849   // E = (E * -0.5) * ((A * E) * E + -3.0)
20850   for (unsigned i = 0; i < Iterations; ++i) {
20851     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
20852     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
20853     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
20854 
20855     // When calculating a square root at the last iteration build:
20856     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
20857     // (notice a common subexpression)
20858     SDValue LHS;
20859     if (Reciprocal || (i + 1) < Iterations) {
20860       // RSQRT: LHS = (E * -0.5)
20861       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
20862     } else {
20863       // SQRT: LHS = (A * E) * -0.5
20864       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
20865     }
20866 
20867     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
20868   }
20869 
20870   return Est;
20871 }
20872 
20873 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
20874 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
20875 /// Op can be zero.
20876 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
20877                                            bool Reciprocal) {
20878   if (LegalDAG)
20879     return SDValue();
20880 
20881   // TODO: Handle half and/or extended types?
20882   EVT VT = Op.getValueType();
20883   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20884     return SDValue();
20885 
20886   // If estimates are explicitly disabled for this function, we're done.
20887   MachineFunction &MF = DAG.getMachineFunction();
20888   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
20889   if (Enabled == TLI.ReciprocalEstimate::Disabled)
20890     return SDValue();
20891 
20892   // Estimates may be explicitly enabled for this type with a custom number of
20893   // refinement steps.
20894   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
20895 
20896   bool UseOneConstNR = false;
20897   if (SDValue Est =
20898       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
20899                           Reciprocal)) {
20900     AddToWorklist(Est.getNode());
20901 
20902     if (Iterations) {
20903       Est = UseOneConstNR
20904             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
20905             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
20906 
20907       if (!Reciprocal) {
20908         // The estimate is now completely wrong if the input was exactly 0.0 or
20909         // possibly a denormal. Force the answer to 0.0 for those cases.
20910         SDLoc DL(Op);
20911         EVT CCVT = getSetCCResultType(VT);
20912         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
20913         DenormalMode DenormMode = DAG.getDenormalMode(VT);
20914         if (DenormMode.Input == DenormalMode::IEEE) {
20915           // This is specifically a check for the handling of denormal inputs,
20916           // not the result.
20917 
20918           // fabs(X) < SmallestNormal ? 0.0 : Est
20919           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
20920           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
20921           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
20922           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20923           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
20924           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
20925           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
20926         } else {
20927           // X == 0.0 ? 0.0 : Est
20928           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20929           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
20930           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
20931         }
20932       }
20933     }
20934     return Est;
20935   }
20936 
20937   return SDValue();
20938 }
20939 
20940 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20941   return buildSqrtEstimateImpl(Op, Flags, true);
20942 }
20943 
20944 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20945   return buildSqrtEstimateImpl(Op, Flags, false);
20946 }
20947 
20948 /// Return true if there is any possibility that the two addresses overlap.
20949 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
20950 
20951   struct MemUseCharacteristics {
20952     bool IsVolatile;
20953     bool IsAtomic;
20954     SDValue BasePtr;
20955     int64_t Offset;
20956     Optional<int64_t> NumBytes;
20957     MachineMemOperand *MMO;
20958   };
20959 
20960   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
20961     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
20962       int64_t Offset = 0;
20963       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
20964         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
20965                      ? C->getSExtValue()
20966                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
20967                            ? -1 * C->getSExtValue()
20968                            : 0;
20969       uint64_t Size =
20970           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
20971       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
20972               Offset /*base offset*/,
20973               Optional<int64_t>(Size),
20974               LSN->getMemOperand()};
20975     }
20976     if (const auto *LN = cast<LifetimeSDNode>(N))
20977       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
20978               (LN->hasOffset()) ? LN->getOffset() : 0,
20979               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
20980                                 : Optional<int64_t>(),
20981               (MachineMemOperand *)nullptr};
20982     // Default.
20983     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
20984             (int64_t)0 /*offset*/,
20985             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
20986   };
20987 
20988   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
20989                         MUC1 = getCharacteristics(Op1);
20990 
20991   // If they are to the same address, then they must be aliases.
20992   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
20993       MUC0.Offset == MUC1.Offset)
20994     return true;
20995 
20996   // If they are both volatile then they cannot be reordered.
20997   if (MUC0.IsVolatile && MUC1.IsVolatile)
20998     return true;
20999 
21000   // Be conservative about atomics for the moment
21001   // TODO: This is way overconservative for unordered atomics (see D66309)
21002   if (MUC0.IsAtomic && MUC1.IsAtomic)
21003     return true;
21004 
21005   if (MUC0.MMO && MUC1.MMO) {
21006     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21007         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21008       return false;
21009   }
21010 
21011   // Try to prove that there is aliasing, or that there is no aliasing. Either
21012   // way, we can return now. If nothing can be proved, proceed with more tests.
21013   bool IsAlias;
21014   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
21015                                        DAG, IsAlias))
21016     return IsAlias;
21017 
21018   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
21019   // either are not known.
21020   if (!MUC0.MMO || !MUC1.MMO)
21021     return true;
21022 
21023   // If one operation reads from invariant memory, and the other may store, they
21024   // cannot alias. These should really be checking the equivalent of mayWrite,
21025   // but it only matters for memory nodes other than load /store.
21026   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21027       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21028     return false;
21029 
21030   // If we know required SrcValue1 and SrcValue2 have relatively large
21031   // alignment compared to the size and offset of the access, we may be able
21032   // to prove they do not alias. This check is conservative for now to catch
21033   // cases created by splitting vector types.
21034   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
21035   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
21036   unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
21037   unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
21038   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
21039       MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
21040       *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
21041     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
21042     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
21043 
21044     // There is no overlap between these relatively aligned accesses of
21045     // similar size. Return no alias.
21046     if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
21047         (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
21048       return false;
21049   }
21050 
21051   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
21052                    ? CombinerGlobalAA
21053                    : DAG.getSubtarget().useAA();
21054 #ifndef NDEBUG
21055   if (CombinerAAOnlyFunc.getNumOccurrences() &&
21056       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
21057     UseAA = false;
21058 #endif
21059 
21060   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
21061     // Use alias analysis information.
21062     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
21063     int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
21064     int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
21065     AliasResult AAResult = AA->alias(
21066         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
21067                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
21068         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
21069                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
21070     if (AAResult == NoAlias)
21071       return false;
21072   }
21073 
21074   // Otherwise we have to assume they alias.
21075   return true;
21076 }
21077 
21078 /// Walk up chain skipping non-aliasing memory nodes,
21079 /// looking for aliasing nodes and adding them to the Aliases vector.
21080 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
21081                                    SmallVectorImpl<SDValue> &Aliases) {
21082   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
21083   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
21084 
21085   // Get alias information for node.
21086   // TODO: relax aliasing for unordered atomics (see D66309)
21087   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
21088 
21089   // Starting off.
21090   Chains.push_back(OriginalChain);
21091   unsigned Depth = 0;
21092 
21093   // Attempt to improve chain by a single step
21094   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
21095     switch (C.getOpcode()) {
21096     case ISD::EntryToken:
21097       // No need to mark EntryToken.
21098       C = SDValue();
21099       return true;
21100     case ISD::LOAD:
21101     case ISD::STORE: {
21102       // Get alias information for C.
21103       // TODO: Relax aliasing for unordered atomics (see D66309)
21104       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
21105                       cast<LSBaseSDNode>(C.getNode())->isSimple();
21106       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
21107         // Look further up the chain.
21108         C = C.getOperand(0);
21109         return true;
21110       }
21111       // Alias, so stop here.
21112       return false;
21113     }
21114 
21115     case ISD::CopyFromReg:
21116       // Always forward past past CopyFromReg.
21117       C = C.getOperand(0);
21118       return true;
21119 
21120     case ISD::LIFETIME_START:
21121     case ISD::LIFETIME_END: {
21122       // We can forward past any lifetime start/end that can be proven not to
21123       // alias the memory access.
21124       if (!isAlias(N, C.getNode())) {
21125         // Look further up the chain.
21126         C = C.getOperand(0);
21127         return true;
21128       }
21129       return false;
21130     }
21131     default:
21132       return false;
21133     }
21134   };
21135 
21136   // Look at each chain and determine if it is an alias.  If so, add it to the
21137   // aliases list.  If not, then continue up the chain looking for the next
21138   // candidate.
21139   while (!Chains.empty()) {
21140     SDValue Chain = Chains.pop_back_val();
21141 
21142     // Don't bother if we've seen Chain before.
21143     if (!Visited.insert(Chain.getNode()).second)
21144       continue;
21145 
21146     // For TokenFactor nodes, look at each operand and only continue up the
21147     // chain until we reach the depth limit.
21148     //
21149     // FIXME: The depth check could be made to return the last non-aliasing
21150     // chain we found before we hit a tokenfactor rather than the original
21151     // chain.
21152     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
21153       Aliases.clear();
21154       Aliases.push_back(OriginalChain);
21155       return;
21156     }
21157 
21158     if (Chain.getOpcode() == ISD::TokenFactor) {
21159       // We have to check each of the operands of the token factor for "small"
21160       // token factors, so we queue them up.  Adding the operands to the queue
21161       // (stack) in reverse order maintains the original order and increases the
21162       // likelihood that getNode will find a matching token factor (CSE.)
21163       if (Chain.getNumOperands() > 16) {
21164         Aliases.push_back(Chain);
21165         continue;
21166       }
21167       for (unsigned n = Chain.getNumOperands(); n;)
21168         Chains.push_back(Chain.getOperand(--n));
21169       ++Depth;
21170       continue;
21171     }
21172     // Everything else
21173     if (ImproveChain(Chain)) {
21174       // Updated Chain Found, Consider new chain if one exists.
21175       if (Chain.getNode())
21176         Chains.push_back(Chain);
21177       ++Depth;
21178       continue;
21179     }
21180     // No Improved Chain Possible, treat as Alias.
21181     Aliases.push_back(Chain);
21182   }
21183 }
21184 
21185 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
21186 /// (aliasing node.)
21187 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
21188   if (OptLevel == CodeGenOpt::None)
21189     return OldChain;
21190 
21191   // Ops for replacing token factor.
21192   SmallVector<SDValue, 8> Aliases;
21193 
21194   // Accumulate all the aliases to this node.
21195   GatherAllAliases(N, OldChain, Aliases);
21196 
21197   // If no operands then chain to entry token.
21198   if (Aliases.size() == 0)
21199     return DAG.getEntryNode();
21200 
21201   // If a single operand then chain to it.  We don't need to revisit it.
21202   if (Aliases.size() == 1)
21203     return Aliases[0];
21204 
21205   // Construct a custom tailored token factor.
21206   return DAG.getTokenFactor(SDLoc(N), Aliases);
21207 }
21208 
21209 namespace {
21210 // TODO: Replace with with std::monostate when we move to C++17.
21211 struct UnitT { } Unit;
21212 bool operator==(const UnitT &, const UnitT &) { return true; }
21213 bool operator!=(const UnitT &, const UnitT &) { return false; }
21214 } // namespace
21215 
21216 // This function tries to collect a bunch of potentially interesting
21217 // nodes to improve the chains of, all at once. This might seem
21218 // redundant, as this function gets called when visiting every store
21219 // node, so why not let the work be done on each store as it's visited?
21220 //
21221 // I believe this is mainly important because MergeConsecutiveStores
21222 // is unable to deal with merging stores of different sizes, so unless
21223 // we improve the chains of all the potential candidates up-front
21224 // before running MergeConsecutiveStores, it might only see some of
21225 // the nodes that will eventually be candidates, and then not be able
21226 // to go from a partially-merged state to the desired final
21227 // fully-merged state.
21228 
21229 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
21230   SmallVector<StoreSDNode *, 8> ChainedStores;
21231   StoreSDNode *STChain = St;
21232   // Intervals records which offsets from BaseIndex have been covered. In
21233   // the common case, every store writes to the immediately previous address
21234   // space and thus merged with the previous interval at insertion time.
21235 
21236   using IMap =
21237       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
21238   IMap::Allocator A;
21239   IMap Intervals(A);
21240 
21241   // This holds the base pointer, index, and the offset in bytes from the base
21242   // pointer.
21243   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
21244 
21245   // We must have a base and an offset.
21246   if (!BasePtr.getBase().getNode())
21247     return false;
21248 
21249   // Do not handle stores to undef base pointers.
21250   if (BasePtr.getBase().isUndef())
21251     return false;
21252 
21253   // BaseIndexOffset assumes that offsets are fixed-size, which
21254   // is not valid for scalable vectors where the offsets are
21255   // scaled by `vscale`, so bail out early.
21256   if (St->getMemoryVT().isScalableVector())
21257     return false;
21258 
21259   // Add ST's interval.
21260   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
21261 
21262   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
21263     // If the chain has more than one use, then we can't reorder the mem ops.
21264     if (!SDValue(Chain, 0)->hasOneUse())
21265       break;
21266     // TODO: Relax for unordered atomics (see D66309)
21267     if (!Chain->isSimple() || Chain->isIndexed())
21268       break;
21269 
21270     // Find the base pointer and offset for this memory node.
21271     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
21272     // Check that the base pointer is the same as the original one.
21273     int64_t Offset;
21274     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
21275       break;
21276     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
21277     // Make sure we don't overlap with other intervals by checking the ones to
21278     // the left or right before inserting.
21279     auto I = Intervals.find(Offset);
21280     // If there's a next interval, we should end before it.
21281     if (I != Intervals.end() && I.start() < (Offset + Length))
21282       break;
21283     // If there's a previous interval, we should start after it.
21284     if (I != Intervals.begin() && (--I).stop() <= Offset)
21285       break;
21286     Intervals.insert(Offset, Offset + Length, Unit);
21287 
21288     ChainedStores.push_back(Chain);
21289     STChain = Chain;
21290   }
21291 
21292   // If we didn't find a chained store, exit.
21293   if (ChainedStores.size() == 0)
21294     return false;
21295 
21296   // Improve all chained stores (St and ChainedStores members) starting from
21297   // where the store chain ended and return single TokenFactor.
21298   SDValue NewChain = STChain->getChain();
21299   SmallVector<SDValue, 8> TFOps;
21300   for (unsigned I = ChainedStores.size(); I;) {
21301     StoreSDNode *S = ChainedStores[--I];
21302     SDValue BetterChain = FindBetterChain(S, NewChain);
21303     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
21304         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
21305     TFOps.push_back(SDValue(S, 0));
21306     ChainedStores[I] = S;
21307   }
21308 
21309   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
21310   SDValue BetterChain = FindBetterChain(St, NewChain);
21311   SDValue NewST;
21312   if (St->isTruncatingStore())
21313     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
21314                               St->getBasePtr(), St->getMemoryVT(),
21315                               St->getMemOperand());
21316   else
21317     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
21318                          St->getBasePtr(), St->getMemOperand());
21319 
21320   TFOps.push_back(NewST);
21321 
21322   // If we improved every element of TFOps, then we've lost the dependence on
21323   // NewChain to successors of St and we need to add it back to TFOps. Do so at
21324   // the beginning to keep relative order consistent with FindBetterChains.
21325   auto hasImprovedChain = [&](SDValue ST) -> bool {
21326     return ST->getOperand(0) != NewChain;
21327   };
21328   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
21329   if (AddNewChain)
21330     TFOps.insert(TFOps.begin(), NewChain);
21331 
21332   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
21333   CombineTo(St, TF);
21334 
21335   // Add TF and its operands to the worklist.
21336   AddToWorklist(TF.getNode());
21337   for (const SDValue &Op : TF->ops())
21338     AddToWorklist(Op.getNode());
21339   AddToWorklist(STChain);
21340   return true;
21341 }
21342 
21343 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
21344   if (OptLevel == CodeGenOpt::None)
21345     return false;
21346 
21347   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
21348 
21349   // We must have a base and an offset.
21350   if (!BasePtr.getBase().getNode())
21351     return false;
21352 
21353   // Do not handle stores to undef base pointers.
21354   if (BasePtr.getBase().isUndef())
21355     return false;
21356 
21357   // Directly improve a chain of disjoint stores starting at St.
21358   if (parallelizeChainedStores(St))
21359     return true;
21360 
21361   // Improve St's Chain..
21362   SDValue BetterChain = FindBetterChain(St, St->getChain());
21363   if (St->getChain() != BetterChain) {
21364     replaceStoreChain(St, BetterChain);
21365     return true;
21366   }
21367   return false;
21368 }
21369 
21370 /// This is the entry point for the file.
21371 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
21372                            CodeGenOpt::Level OptLevel) {
21373   /// This is the main entry point to this class.
21374   DAGCombiner(*this, AA, OptLevel).Run(Level);
21375 }
21376