1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the RISCV target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVMCTargetDesc.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCVISelLowering.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/IR/IntrinsicsRISCV.h"
20 #include "llvm/Support/Alignment.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/KnownBits.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "riscv-isel"
29 
30 namespace llvm {
31 namespace RISCV {
32 #define GET_RISCVVSSEGTable_IMPL
33 #define GET_RISCVVLSEGTable_IMPL
34 #define GET_RISCVVLXSEGTable_IMPL
35 #define GET_RISCVVSXSEGTable_IMPL
36 #define GET_RISCVVLETable_IMPL
37 #define GET_RISCVVSETable_IMPL
38 #define GET_RISCVVLXTable_IMPL
39 #define GET_RISCVVSXTable_IMPL
40 #define GET_RISCVMaskedPseudosTable_IMPL
41 #include "RISCVGenSearchableTables.inc"
42 } // namespace RISCV
43 } // namespace llvm
44 
45 void RISCVDAGToDAGISel::PreprocessISelDAG() {
46   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
47                                        E = CurDAG->allnodes_end();
48        I != E;) {
49     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
50 
51     // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
52     // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
53     if (N->getOpcode() == ISD::SPLAT_VECTOR) {
54       MVT VT = N->getSimpleValueType(0);
55       unsigned Opc =
56           VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
57       SDLoc DL(N);
58       SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
59       SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
60                                        N->getOperand(0), VL);
61 
62       --I;
63       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
64       ++I;
65       CurDAG->DeleteNode(N);
66       continue;
67     }
68 
69     // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
70     // load. Done after lowering and combining so that we have a chance to
71     // optimize this to VMV_V_X_VL when the upper bits aren't needed.
72     if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL)
73       continue;
74 
75     assert(N->getNumOperands() == 4 && "Unexpected number of operands");
76     MVT VT = N->getSimpleValueType(0);
77     SDValue Passthru = N->getOperand(0);
78     SDValue Lo = N->getOperand(1);
79     SDValue Hi = N->getOperand(2);
80     SDValue VL = N->getOperand(3);
81     assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
82            Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
83            "Unexpected VTs!");
84     MachineFunction &MF = CurDAG->getMachineFunction();
85     RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
86     SDLoc DL(N);
87 
88     // We use the same frame index we use for moving two i32s into 64-bit FPR.
89     // This is an analogous operation.
90     int FI = FuncInfo->getMoveF64FrameIndex(MF);
91     MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
92     const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
93     SDValue StackSlot =
94         CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
95 
96     SDValue Chain = CurDAG->getEntryNode();
97     Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
98 
99     SDValue OffsetSlot =
100         CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
101     Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
102                           Align(8));
103 
104     Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
105 
106     SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
107     SDValue IntID =
108         CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
109     SDValue Ops[] = {Chain,
110                      IntID,
111                      Passthru,
112                      StackSlot,
113                      CurDAG->getRegister(RISCV::X0, MVT::i64),
114                      VL};
115 
116     SDValue Result = CurDAG->getMemIntrinsicNode(
117         ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8),
118         MachineMemOperand::MOLoad);
119 
120     // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the
121     // vlse we created.  This will cause general havok on the dag because
122     // anything below the conversion could be folded into other existing nodes.
123     // To avoid invalidating 'I', back it up to the convert node.
124     --I;
125     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
126 
127     // Now that we did that, the node is dead.  Increment the iterator to the
128     // next node to process, then delete N.
129     ++I;
130     CurDAG->DeleteNode(N);
131   }
132 }
133 
134 void RISCVDAGToDAGISel::PostprocessISelDAG() {
135   HandleSDNode Dummy(CurDAG->getRoot());
136   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
137 
138   bool MadeChange = false;
139   while (Position != CurDAG->allnodes_begin()) {
140     SDNode *N = &*--Position;
141     // Skip dead nodes and any non-machine opcodes.
142     if (N->use_empty() || !N->isMachineOpcode())
143       continue;
144 
145     MadeChange |= doPeepholeSExtW(N);
146     MadeChange |= doPeepholeLoadStoreADDI(N);
147     MadeChange |= doPeepholeMaskedRVV(N);
148   }
149 
150   CurDAG->setRoot(Dummy.getValue());
151 
152   if (MadeChange)
153     CurDAG->RemoveDeadNodes();
154 }
155 
156 static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL,
157                                          const MVT VT, int64_t Imm,
158                                          const RISCVSubtarget &Subtarget) {
159   assert(VT == MVT::i64 && "Expecting MVT::i64");
160   const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
161   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool(
162       ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT));
163   SDValue Addr = TLI->getAddr(CP, *CurDAG);
164   SDValue Offset = CurDAG->getTargetConstant(0, DL, VT);
165   // Since there is no data race, the chain can be the entry node.
166   SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset,
167                                         CurDAG->getEntryNode());
168   MachineFunction &MF = CurDAG->getMachineFunction();
169   MachineMemOperand *MemOp = MF.getMachineMemOperand(
170       MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
171       LLT(VT), CP->getAlign());
172   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp});
173   return Load;
174 }
175 
176 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
177                          int64_t Imm, const RISCVSubtarget &Subtarget) {
178   MVT XLenVT = Subtarget.getXLenVT();
179   RISCVMatInt::InstSeq Seq =
180       RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
181 
182   // If Imm is expensive to build, then we put it into constant pool.
183   if (Subtarget.useConstantPoolForLargeInts() &&
184       Seq.size() > Subtarget.getMaxBuildIntsCost())
185     return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget);
186 
187   SDNode *Result = nullptr;
188   SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
189   for (RISCVMatInt::Inst &Inst : Seq) {
190     SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
191     switch (Inst.getOpndKind()) {
192     case RISCVMatInt::Imm:
193       Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SDImm);
194       break;
195     case RISCVMatInt::RegX0:
196       Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg,
197                                       CurDAG->getRegister(RISCV::X0, XLenVT));
198       break;
199     case RISCVMatInt::RegReg:
200       Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg);
201       break;
202     case RISCVMatInt::RegImm:
203       Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
204       break;
205     }
206 
207     // Only the first instruction has X0 as its source.
208     SrcReg = SDValue(Result, 0);
209   }
210 
211   return Result;
212 }
213 
214 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
215                                unsigned RegClassID, unsigned SubReg0) {
216   assert(Regs.size() >= 2 && Regs.size() <= 8);
217 
218   SDLoc DL(Regs[0]);
219   SmallVector<SDValue, 8> Ops;
220 
221   Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
222 
223   for (unsigned I = 0; I < Regs.size(); ++I) {
224     Ops.push_back(Regs[I]);
225     Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
226   }
227   SDNode *N =
228       CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
229   return SDValue(N, 0);
230 }
231 
232 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
233                              unsigned NF) {
234   static const unsigned RegClassIDs[] = {
235       RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
236       RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
237       RISCV::VRN8M1RegClassID};
238 
239   return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0);
240 }
241 
242 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
243                              unsigned NF) {
244   static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID,
245                                          RISCV::VRN3M2RegClassID,
246                                          RISCV::VRN4M2RegClassID};
247 
248   return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0);
249 }
250 
251 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
252                              unsigned NF) {
253   return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID,
254                          RISCV::sub_vrm4_0);
255 }
256 
257 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
258                            unsigned NF, RISCVII::VLMUL LMUL) {
259   switch (LMUL) {
260   default:
261     llvm_unreachable("Invalid LMUL.");
262   case RISCVII::VLMUL::LMUL_F8:
263   case RISCVII::VLMUL::LMUL_F4:
264   case RISCVII::VLMUL::LMUL_F2:
265   case RISCVII::VLMUL::LMUL_1:
266     return createM1Tuple(CurDAG, Regs, NF);
267   case RISCVII::VLMUL::LMUL_2:
268     return createM2Tuple(CurDAG, Regs, NF);
269   case RISCVII::VLMUL::LMUL_4:
270     return createM4Tuple(CurDAG, Regs, NF);
271   }
272 }
273 
274 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
275     SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
276     bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
277     bool IsLoad, MVT *IndexVT) {
278   SDValue Chain = Node->getOperand(0);
279   SDValue Glue;
280 
281   SDValue Base;
282   SelectBaseAddr(Node->getOperand(CurOp++), Base);
283   Operands.push_back(Base); // Base pointer.
284 
285   if (IsStridedOrIndexed) {
286     Operands.push_back(Node->getOperand(CurOp++)); // Index.
287     if (IndexVT)
288       *IndexVT = Operands.back()->getSimpleValueType(0);
289   }
290 
291   if (IsMasked) {
292     // Mask needs to be copied to V0.
293     SDValue Mask = Node->getOperand(CurOp++);
294     Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
295     Glue = Chain.getValue(1);
296     Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
297   }
298   SDValue VL;
299   selectVLOp(Node->getOperand(CurOp++), VL);
300   Operands.push_back(VL);
301 
302   MVT XLenVT = Subtarget->getXLenVT();
303   SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
304   Operands.push_back(SEWOp);
305 
306   // Masked load has the tail policy argument.
307   if (IsMasked && IsLoad) {
308     // Policy must be a constant.
309     uint64_t Policy = Node->getConstantOperandVal(CurOp++);
310     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
311     Operands.push_back(PolicyOp);
312   }
313 
314   Operands.push_back(Chain); // Chain.
315   if (Glue)
316     Operands.push_back(Glue);
317 }
318 
319 static bool isAllUndef(ArrayRef<SDValue> Values) {
320   return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); });
321 }
322 
323 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
324                                     bool IsStrided) {
325   SDLoc DL(Node);
326   unsigned NF = Node->getNumValues() - 1;
327   MVT VT = Node->getSimpleValueType(0);
328   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
329   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
330 
331   unsigned CurOp = 2;
332   SmallVector<SDValue, 8> Operands;
333 
334   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
335                                Node->op_begin() + CurOp + NF);
336   bool IsTU = IsMasked || !isAllUndef(Regs);
337   if (IsTU) {
338     SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
339     Operands.push_back(Merge);
340   }
341   CurOp += NF;
342 
343   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
344                              Operands, /*IsLoad=*/true);
345 
346   const RISCV::VLSEGPseudo *P =
347       RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
348                             static_cast<unsigned>(LMUL));
349   MachineSDNode *Load =
350       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
351 
352   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
353     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
354 
355   SDValue SuperReg = SDValue(Load, 0);
356   for (unsigned I = 0; I < NF; ++I) {
357     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
358     ReplaceUses(SDValue(Node, I),
359                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
360   }
361 
362   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
363   CurDAG->RemoveDeadNode(Node);
364 }
365 
366 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
367   SDLoc DL(Node);
368   unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
369   MVT VT = Node->getSimpleValueType(0);
370   MVT XLenVT = Subtarget->getXLenVT();
371   unsigned SEW = VT.getScalarSizeInBits();
372   unsigned Log2SEW = Log2_32(SEW);
373   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
374 
375   unsigned CurOp = 2;
376   SmallVector<SDValue, 7> Operands;
377 
378   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
379                                Node->op_begin() + CurOp + NF);
380   bool IsTU = IsMasked || !isAllUndef(Regs);
381   if (IsTU) {
382     SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
383     Operands.push_back(MaskedOff);
384   }
385   CurOp += NF;
386 
387   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
388                              /*IsStridedOrIndexed*/ false, Operands,
389                              /*IsLoad=*/true);
390 
391   const RISCV::VLSEGPseudo *P =
392       RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
393                             Log2SEW, static_cast<unsigned>(LMUL));
394   MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
395                                                MVT::Other, MVT::Glue, Operands);
396   bool TailAgnostic = true;
397   bool MaskAgnostic = false;
398   if (IsMasked) {
399     uint64_t Policy = Node->getConstantOperandVal(Node->getNumOperands() - 1);
400     TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
401     MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
402   }
403   unsigned VType =
404       RISCVVType::encodeVTYPE(LMUL, SEW, TailAgnostic, MaskAgnostic);
405   SDValue VTypeOp = CurDAG->getTargetConstant(VType, DL, XLenVT);
406   SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,
407                                           VTypeOp, /*Glue*/ SDValue(Load, 2));
408 
409   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
410     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
411 
412   SDValue SuperReg = SDValue(Load, 0);
413   for (unsigned I = 0; I < NF; ++I) {
414     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
415     ReplaceUses(SDValue(Node, I),
416                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
417   }
418 
419   ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0));   // VL
420   ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain
421   CurDAG->RemoveDeadNode(Node);
422 }
423 
424 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
425                                      bool IsOrdered) {
426   SDLoc DL(Node);
427   unsigned NF = Node->getNumValues() - 1;
428   MVT VT = Node->getSimpleValueType(0);
429   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
430   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
431 
432   unsigned CurOp = 2;
433   SmallVector<SDValue, 8> Operands;
434 
435   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
436                                Node->op_begin() + CurOp + NF);
437   bool IsTU = IsMasked || !isAllUndef(Regs);
438   if (IsTU) {
439     SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
440     Operands.push_back(MaskedOff);
441   }
442   CurOp += NF;
443 
444   MVT IndexVT;
445   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
446                              /*IsStridedOrIndexed*/ true, Operands,
447                              /*IsLoad=*/true, &IndexVT);
448 
449   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
450          "Element count mismatch");
451 
452   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
453   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
454   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
455     report_fatal_error("The V extension does not support EEW=64 for index "
456                        "values when XLEN=32");
457   }
458   const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
459       NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
460       static_cast<unsigned>(IndexLMUL));
461   MachineSDNode *Load =
462       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
463 
464   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
465     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
466 
467   SDValue SuperReg = SDValue(Load, 0);
468   for (unsigned I = 0; I < NF; ++I) {
469     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
470     ReplaceUses(SDValue(Node, I),
471                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
472   }
473 
474   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
475   CurDAG->RemoveDeadNode(Node);
476 }
477 
478 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
479                                     bool IsStrided) {
480   SDLoc DL(Node);
481   unsigned NF = Node->getNumOperands() - 4;
482   if (IsStrided)
483     NF--;
484   if (IsMasked)
485     NF--;
486   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
487   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
488   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
489   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
490   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
491 
492   SmallVector<SDValue, 8> Operands;
493   Operands.push_back(StoreVal);
494   unsigned CurOp = 2 + NF;
495 
496   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
497                              Operands);
498 
499   const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
500       NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
501   MachineSDNode *Store =
502       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
503 
504   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
505     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
506 
507   ReplaceNode(Node, Store);
508 }
509 
510 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
511                                      bool IsOrdered) {
512   SDLoc DL(Node);
513   unsigned NF = Node->getNumOperands() - 5;
514   if (IsMasked)
515     --NF;
516   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
517   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
518   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
519   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
520   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
521 
522   SmallVector<SDValue, 8> Operands;
523   Operands.push_back(StoreVal);
524   unsigned CurOp = 2 + NF;
525 
526   MVT IndexVT;
527   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
528                              /*IsStridedOrIndexed*/ true, Operands,
529                              /*IsLoad=*/false, &IndexVT);
530 
531   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
532          "Element count mismatch");
533 
534   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
535   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
536   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
537     report_fatal_error("The V extension does not support EEW=64 for index "
538                        "values when XLEN=32");
539   }
540   const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
541       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
542       static_cast<unsigned>(IndexLMUL));
543   MachineSDNode *Store =
544       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
545 
546   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
547     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
548 
549   ReplaceNode(Node, Store);
550 }
551 
552 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
553   if (!Subtarget->hasVInstructions())
554     return;
555 
556   assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
557           Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) &&
558          "Unexpected opcode");
559 
560   SDLoc DL(Node);
561   MVT XLenVT = Subtarget->getXLenVT();
562 
563   bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
564   unsigned IntNoOffset = HasChain ? 1 : 0;
565   unsigned IntNo = Node->getConstantOperandVal(IntNoOffset);
566 
567   assert((IntNo == Intrinsic::riscv_vsetvli ||
568           IntNo == Intrinsic::riscv_vsetvlimax ||
569           IntNo == Intrinsic::riscv_vsetvli_opt ||
570           IntNo == Intrinsic::riscv_vsetvlimax_opt) &&
571          "Unexpected vsetvli intrinsic");
572 
573   bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax ||
574                IntNo == Intrinsic::riscv_vsetvlimax_opt;
575   unsigned Offset = IntNoOffset + (VLMax ? 1 : 2);
576 
577   assert(Node->getNumOperands() == Offset + 2 &&
578          "Unexpected number of operands");
579 
580   unsigned SEW =
581       RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
582   RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
583       Node->getConstantOperandVal(Offset + 1) & 0x7);
584 
585   unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
586                                             /*MaskAgnostic*/ false);
587   SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
588 
589   SmallVector<EVT, 2> VTs = {XLenVT};
590   if (HasChain)
591     VTs.push_back(MVT::Other);
592 
593   SDValue VLOperand;
594   unsigned Opcode = RISCV::PseudoVSETVLI;
595   if (VLMax) {
596     VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
597     Opcode = RISCV::PseudoVSETVLIX0;
598   } else {
599     VLOperand = Node->getOperand(IntNoOffset + 1);
600 
601     if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
602       uint64_t AVL = C->getZExtValue();
603       if (isUInt<5>(AVL)) {
604         SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
605         SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
606         if (HasChain)
607           Ops.push_back(Node->getOperand(0));
608         ReplaceNode(
609             Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
610         return;
611       }
612     }
613   }
614 
615   SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
616   if (HasChain)
617     Ops.push_back(Node->getOperand(0));
618 
619   ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
620 }
621 
622 void RISCVDAGToDAGISel::Select(SDNode *Node) {
623   // If we have a custom node, we have already selected.
624   if (Node->isMachineOpcode()) {
625     LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
626     Node->setNodeId(-1);
627     return;
628   }
629 
630   // Instruction Selection not handled by the auto-generated tablegen selection
631   // should be handled here.
632   unsigned Opcode = Node->getOpcode();
633   MVT XLenVT = Subtarget->getXLenVT();
634   SDLoc DL(Node);
635   MVT VT = Node->getSimpleValueType(0);
636 
637   switch (Opcode) {
638   case ISD::Constant: {
639     auto *ConstNode = cast<ConstantSDNode>(Node);
640     if (VT == XLenVT && ConstNode->isZero()) {
641       SDValue New =
642           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT);
643       ReplaceNode(Node, New.getNode());
644       return;
645     }
646     int64_t Imm = ConstNode->getSExtValue();
647     // If the upper XLen-16 bits are not used, try to convert this to a simm12
648     // by sign extending bit 15.
649     if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
650         hasAllHUsers(Node))
651       Imm = SignExtend64<16>(Imm);
652     // If the upper 32-bits are not used try to convert this into a simm32 by
653     // sign extending bit 32.
654     if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
655       Imm = SignExtend64<32>(Imm);
656 
657     ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
658     return;
659   }
660   case ISD::FrameIndex: {
661     SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
662     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
663     SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
664     ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
665     return;
666   }
667   case ISD::SRL: {
668     // Optimize (srl (and X, C2), C) ->
669     //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
670     // Where C2 is a mask with C3 trailing ones.
671     // Taking into account that the C2 may have had lower bits unset by
672     // SimplifyDemandedBits. This avoids materializing the C2 immediate.
673     // This pattern occurs when type legalizing right shifts for types with
674     // less than XLen bits.
675     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
676     if (!N1C)
677       break;
678     SDValue N0 = Node->getOperand(0);
679     if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
680         !isa<ConstantSDNode>(N0.getOperand(1)))
681       break;
682     unsigned ShAmt = N1C->getZExtValue();
683     uint64_t Mask = N0.getConstantOperandVal(1);
684     Mask |= maskTrailingOnes<uint64_t>(ShAmt);
685     if (!isMask_64(Mask))
686       break;
687     unsigned TrailingOnes = countTrailingOnes(Mask);
688     // 32 trailing ones should use srliw via tablegen pattern.
689     if (TrailingOnes == 32 || ShAmt >= TrailingOnes)
690       break;
691     unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
692     SDNode *SLLI =
693         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
694                                CurDAG->getTargetConstant(LShAmt, DL, VT));
695     SDNode *SRLI = CurDAG->getMachineNode(
696         RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
697         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
698     ReplaceNode(Node, SRLI);
699     return;
700   }
701   case ISD::SRA: {
702     // Optimize (sra (sext_inreg X, i16), C) ->
703     //          (srai (slli X, (XLen-16), (XLen-16) + C)
704     // And      (sra (sext_inreg X, i8), C) ->
705     //          (srai (slli X, (XLen-8), (XLen-8) + C)
706     // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
707     // This transform matches the code we get without Zbb. The shifts are more
708     // compressible, and this can help expose CSE opportunities in the sdiv by
709     // constant optimization.
710     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
711     if (!N1C)
712       break;
713     SDValue N0 = Node->getOperand(0);
714     if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
715       break;
716     unsigned ShAmt = N1C->getZExtValue();
717     unsigned ExtSize =
718         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
719     // ExtSize of 32 should use sraiw via tablegen pattern.
720     if (ExtSize >= 32 || ShAmt >= ExtSize)
721       break;
722     unsigned LShAmt = Subtarget->getXLen() - ExtSize;
723     SDNode *SLLI =
724         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
725                                CurDAG->getTargetConstant(LShAmt, DL, VT));
726     SDNode *SRAI = CurDAG->getMachineNode(
727         RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
728         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
729     ReplaceNode(Node, SRAI);
730     return;
731   }
732   case ISD::AND: {
733     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
734     if (!N1C)
735       break;
736 
737     SDValue N0 = Node->getOperand(0);
738 
739     bool LeftShift = N0.getOpcode() == ISD::SHL;
740     if (!LeftShift && N0.getOpcode() != ISD::SRL)
741       break;
742 
743     auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
744     if (!C)
745       break;
746     uint64_t C2 = C->getZExtValue();
747     unsigned XLen = Subtarget->getXLen();
748     if (!C2 || C2 >= XLen)
749       break;
750 
751     uint64_t C1 = N1C->getZExtValue();
752 
753     // Keep track of whether this is a c.andi. If we can't use c.andi, the
754     // shift pair might offer more compression opportunities.
755     // TODO: We could check for C extension here, but we don't have many lit
756     // tests with the C extension enabled so not checking gets better coverage.
757     // TODO: What if ANDI faster than shift?
758     bool IsCANDI = isInt<6>(N1C->getSExtValue());
759 
760     // Clear irrelevant bits in the mask.
761     if (LeftShift)
762       C1 &= maskTrailingZeros<uint64_t>(C2);
763     else
764       C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
765 
766     // Some transforms should only be done if the shift has a single use or
767     // the AND would become (srli (slli X, 32), 32)
768     bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
769 
770     SDValue X = N0.getOperand(0);
771 
772     // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
773     // with c3 leading zeros.
774     if (!LeftShift && isMask_64(C1)) {
775       uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
776       if (C2 < C3) {
777         // If the number of leading zeros is C2+32 this can be SRLIW.
778         if (C2 + 32 == C3) {
779           SDNode *SRLIW =
780               CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X,
781                                      CurDAG->getTargetConstant(C2, DL, XLenVT));
782           ReplaceNode(Node, SRLIW);
783           return;
784         }
785 
786         // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if
787         // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
788         //
789         // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
790         // legalized and goes through DAG combine.
791         if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&
792             X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
793             cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
794           SDNode *SRAIW =
795               CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, X.getOperand(0),
796                                      CurDAG->getTargetConstant(31, DL, XLenVT));
797           SDNode *SRLIW = CurDAG->getMachineNode(
798               RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0),
799               CurDAG->getTargetConstant(C3 - 32, DL, XLenVT));
800           ReplaceNode(Node, SRLIW);
801           return;
802         }
803 
804         // (srli (slli x, c3-c2), c3).
805         // Skip if we could use (zext.w (sraiw X, C2)).
806         bool Skip = Subtarget->hasStdExtZba() && C3 == 32 &&
807                     X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
808                     cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
809         // Also Skip if we can use bexti.
810         Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1;
811         if (OneUseOrZExtW && !Skip) {
812           SDNode *SLLI = CurDAG->getMachineNode(
813               RISCV::SLLI, DL, XLenVT, X,
814               CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
815           SDNode *SRLI =
816               CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
817                                      CurDAG->getTargetConstant(C3, DL, XLenVT));
818           ReplaceNode(Node, SRLI);
819           return;
820         }
821       }
822     }
823 
824     // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
825     // shifted by c2 bits with c3 leading zeros.
826     if (LeftShift && isShiftedMask_64(C1)) {
827       uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
828 
829       if (C2 + C3 < XLen &&
830           C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
831         // Use slli.uw when possible.
832         if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
833           SDNode *SLLI_UW =
834               CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X,
835                                      CurDAG->getTargetConstant(C2, DL, XLenVT));
836           ReplaceNode(Node, SLLI_UW);
837           return;
838         }
839 
840         // (srli (slli c2+c3), c3)
841         if (OneUseOrZExtW && !IsCANDI) {
842           SDNode *SLLI = CurDAG->getMachineNode(
843               RISCV::SLLI, DL, XLenVT, X,
844               CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
845           SDNode *SRLI =
846               CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
847                                      CurDAG->getTargetConstant(C3, DL, XLenVT));
848           ReplaceNode(Node, SRLI);
849           return;
850         }
851       }
852     }
853 
854     // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
855     // shifted mask with c2 leading zeros and c3 trailing zeros.
856     if (!LeftShift && isShiftedMask_64(C1)) {
857       uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
858       uint64_t C3 = countTrailingZeros(C1);
859       if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) {
860         unsigned SrliOpc = RISCV::SRLI;
861         // If the input is zexti32 we should use SRLIW.
862         if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
863             X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
864           SrliOpc = RISCV::SRLIW;
865           X = X.getOperand(0);
866         }
867         SDNode *SRLI = CurDAG->getMachineNode(
868             SrliOpc, DL, XLenVT, X,
869             CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
870         SDNode *SLLI =
871             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
872                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
873         ReplaceNode(Node, SLLI);
874         return;
875       }
876       // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
877       if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
878           OneUseOrZExtW && !IsCANDI) {
879         SDNode *SRLIW = CurDAG->getMachineNode(
880             RISCV::SRLIW, DL, XLenVT, X,
881             CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
882         SDNode *SLLI =
883             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
884                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
885         ReplaceNode(Node, SLLI);
886         return;
887       }
888     }
889 
890     // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
891     // shifted mask with no leading zeros and c3 trailing zeros.
892     if (LeftShift && isShiftedMask_64(C1)) {
893       uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
894       uint64_t C3 = countTrailingZeros(C1);
895       if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) {
896         SDNode *SRLI = CurDAG->getMachineNode(
897             RISCV::SRLI, DL, XLenVT, X,
898             CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
899         SDNode *SLLI =
900             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
901                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
902         ReplaceNode(Node, SLLI);
903         return;
904       }
905       // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
906       if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
907         SDNode *SRLIW = CurDAG->getMachineNode(
908             RISCV::SRLIW, DL, XLenVT, X,
909             CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
910         SDNode *SLLI =
911             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
912                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
913         ReplaceNode(Node, SLLI);
914         return;
915       }
916     }
917 
918     break;
919   }
920   case ISD::MUL: {
921     // Special case for calculating (mul (and X, C2), C1) where the full product
922     // fits in XLen bits. We can shift X left by the number of leading zeros in
923     // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
924     // product has XLen trailing zeros, putting it in the output of MULHU. This
925     // can avoid materializing a constant in a register for C2.
926 
927     // RHS should be a constant.
928     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
929     if (!N1C || !N1C->hasOneUse())
930       break;
931 
932     // LHS should be an AND with constant.
933     SDValue N0 = Node->getOperand(0);
934     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
935       break;
936 
937     uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
938 
939     // Constant should be a mask.
940     if (!isMask_64(C2))
941       break;
942 
943     // This should be the only use of the AND unless we will use
944     // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND
945     // constants.
946     if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF))
947       break;
948 
949     // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this
950     // optimization.
951     if (isInt<12>(C2) ||
952         (C2 == UINT64_C(0xFFFF) &&
953          (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) ||
954         (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()))
955       break;
956 
957     // We need to shift left the AND input and C1 by a total of XLen bits.
958 
959     // How far left do we need to shift the AND input?
960     unsigned XLen = Subtarget->getXLen();
961     unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2));
962 
963     // The constant gets shifted by the remaining amount unless that would
964     // shift bits out.
965     uint64_t C1 = N1C->getZExtValue();
966     unsigned ConstantShift = XLen - LeadingZeros;
967     if (ConstantShift > (XLen - (64 - countLeadingZeros(C1))))
968       break;
969 
970     uint64_t ShiftedC1 = C1 << ConstantShift;
971     // If this RV32, we need to sign extend the constant.
972     if (XLen == 32)
973       ShiftedC1 = SignExtend64<32>(ShiftedC1);
974 
975     // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
976     SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);
977     SDNode *SLLI =
978         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
979                                CurDAG->getTargetConstant(LeadingZeros, DL, VT));
980     SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
981                                            SDValue(SLLI, 0), SDValue(Imm, 0));
982     ReplaceNode(Node, MULHU);
983     return;
984   }
985   case ISD::INTRINSIC_WO_CHAIN: {
986     unsigned IntNo = Node->getConstantOperandVal(0);
987     switch (IntNo) {
988       // By default we do not custom select any intrinsic.
989     default:
990       break;
991     case Intrinsic::riscv_vmsgeu:
992     case Intrinsic::riscv_vmsge: {
993       SDValue Src1 = Node->getOperand(1);
994       SDValue Src2 = Node->getOperand(2);
995       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
996       bool IsCmpUnsignedZero = false;
997       // Only custom select scalar second operand.
998       if (Src2.getValueType() != XLenVT)
999         break;
1000       // Small constants are handled with patterns.
1001       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1002         int64_t CVal = C->getSExtValue();
1003         if (CVal >= -15 && CVal <= 16) {
1004           if (!IsUnsigned || CVal != 0)
1005             break;
1006           IsCmpUnsignedZero = true;
1007         }
1008       }
1009       MVT Src1VT = Src1.getSimpleValueType();
1010       unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1011       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1012       default:
1013         llvm_unreachable("Unexpected LMUL!");
1014 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)            \
1015   case RISCVII::VLMUL::lmulenum:                                               \
1016     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1017                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1018     VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
1019     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b;                             \
1020     break;
1021         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1022         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1023         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1024         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
1025         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1026         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1027         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1028 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1029       }
1030       SDValue SEW = CurDAG->getTargetConstant(
1031           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1032       SDValue VL;
1033       selectVLOp(Node->getOperand(3), VL);
1034 
1035       // If vmsgeu with 0 immediate, expand it to vmset.
1036       if (IsCmpUnsignedZero) {
1037         ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1038         return;
1039       }
1040 
1041       // Expand to
1042       // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1043       SDValue Cmp = SDValue(
1044           CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1045           0);
1046       ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1047                                                {Cmp, Cmp, VL, SEW}));
1048       return;
1049     }
1050     case Intrinsic::riscv_vmsgeu_mask:
1051     case Intrinsic::riscv_vmsge_mask: {
1052       SDValue Src1 = Node->getOperand(2);
1053       SDValue Src2 = Node->getOperand(3);
1054       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1055       bool IsCmpUnsignedZero = false;
1056       // Only custom select scalar second operand.
1057       if (Src2.getValueType() != XLenVT)
1058         break;
1059       // Small constants are handled with patterns.
1060       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1061         int64_t CVal = C->getSExtValue();
1062         if (CVal >= -15 && CVal <= 16) {
1063           if (!IsUnsigned || CVal != 0)
1064             break;
1065           IsCmpUnsignedZero = true;
1066         }
1067       }
1068       MVT Src1VT = Src1.getSimpleValueType();
1069       unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1070           VMOROpcode;
1071       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1072       default:
1073         llvm_unreachable("Unexpected LMUL!");
1074 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)                         \
1075   case RISCVII::VLMUL::lmulenum:                                               \
1076     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1077                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1078     VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1079                                  : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1080     break;
1081         CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1082         CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1083         CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1084         CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1085         CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1086         CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1087         CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1088 #undef CASE_VMSLT_OPCODES
1089       }
1090       // Mask operations use the LMUL from the mask type.
1091       switch (RISCVTargetLowering::getLMUL(VT)) {
1092       default:
1093         llvm_unreachable("Unexpected LMUL!");
1094 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)                       \
1095   case RISCVII::VLMUL::lmulenum:                                               \
1096     VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
1097     VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
1098     VMOROpcode = RISCV::PseudoVMOR_MM_##suffix;                                \
1099     break;
1100         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1101         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1102         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1103         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)
1104         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)
1105         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)
1106         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)
1107 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1108       }
1109       SDValue SEW = CurDAG->getTargetConstant(
1110           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1111       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1112       SDValue VL;
1113       selectVLOp(Node->getOperand(5), VL);
1114       SDValue MaskedOff = Node->getOperand(1);
1115       SDValue Mask = Node->getOperand(4);
1116 
1117       // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1118       if (IsCmpUnsignedZero) {
1119         // We don't need vmor if the MaskedOff and the Mask are the same
1120         // value.
1121         if (Mask == MaskedOff) {
1122           ReplaceUses(Node, Mask.getNode());
1123           return;
1124         }
1125         ReplaceNode(Node,
1126                     CurDAG->getMachineNode(VMOROpcode, DL, VT,
1127                                            {Mask, MaskedOff, VL, MaskSEW}));
1128         return;
1129       }
1130 
1131       // If the MaskedOff value and the Mask are the same value use
1132       // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
1133       // This avoids needing to copy v0 to vd before starting the next sequence.
1134       if (Mask == MaskedOff) {
1135         SDValue Cmp = SDValue(
1136             CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1137             0);
1138         ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1139                                                  {Mask, Cmp, VL, MaskSEW}));
1140         return;
1141       }
1142 
1143       // Mask needs to be copied to V0.
1144       SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1145                                            RISCV::V0, Mask, SDValue());
1146       SDValue Glue = Chain.getValue(1);
1147       SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1148 
1149       // Otherwise use
1150       // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1151       // The result is mask undisturbed.
1152       // We use the same instructions to emulate mask agnostic behavior, because
1153       // the agnostic result can be either undisturbed or all 1.
1154       SDValue Cmp = SDValue(
1155           CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1156                                  {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1157           0);
1158       // vmxor.mm vd, vd, v0 is used to update active value.
1159       ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1160                                                {Cmp, Mask, VL, MaskSEW}));
1161       return;
1162     }
1163     case Intrinsic::riscv_vsetvli_opt:
1164     case Intrinsic::riscv_vsetvlimax_opt:
1165       return selectVSETVLI(Node);
1166     }
1167     break;
1168   }
1169   case ISD::INTRINSIC_W_CHAIN: {
1170     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1171     switch (IntNo) {
1172       // By default we do not custom select any intrinsic.
1173     default:
1174       break;
1175     case Intrinsic::riscv_vsetvli:
1176     case Intrinsic::riscv_vsetvlimax:
1177       return selectVSETVLI(Node);
1178     case Intrinsic::riscv_vlseg2:
1179     case Intrinsic::riscv_vlseg3:
1180     case Intrinsic::riscv_vlseg4:
1181     case Intrinsic::riscv_vlseg5:
1182     case Intrinsic::riscv_vlseg6:
1183     case Intrinsic::riscv_vlseg7:
1184     case Intrinsic::riscv_vlseg8: {
1185       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1186       return;
1187     }
1188     case Intrinsic::riscv_vlseg2_mask:
1189     case Intrinsic::riscv_vlseg3_mask:
1190     case Intrinsic::riscv_vlseg4_mask:
1191     case Intrinsic::riscv_vlseg5_mask:
1192     case Intrinsic::riscv_vlseg6_mask:
1193     case Intrinsic::riscv_vlseg7_mask:
1194     case Intrinsic::riscv_vlseg8_mask: {
1195       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1196       return;
1197     }
1198     case Intrinsic::riscv_vlsseg2:
1199     case Intrinsic::riscv_vlsseg3:
1200     case Intrinsic::riscv_vlsseg4:
1201     case Intrinsic::riscv_vlsseg5:
1202     case Intrinsic::riscv_vlsseg6:
1203     case Intrinsic::riscv_vlsseg7:
1204     case Intrinsic::riscv_vlsseg8: {
1205       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1206       return;
1207     }
1208     case Intrinsic::riscv_vlsseg2_mask:
1209     case Intrinsic::riscv_vlsseg3_mask:
1210     case Intrinsic::riscv_vlsseg4_mask:
1211     case Intrinsic::riscv_vlsseg5_mask:
1212     case Intrinsic::riscv_vlsseg6_mask:
1213     case Intrinsic::riscv_vlsseg7_mask:
1214     case Intrinsic::riscv_vlsseg8_mask: {
1215       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1216       return;
1217     }
1218     case Intrinsic::riscv_vloxseg2:
1219     case Intrinsic::riscv_vloxseg3:
1220     case Intrinsic::riscv_vloxseg4:
1221     case Intrinsic::riscv_vloxseg5:
1222     case Intrinsic::riscv_vloxseg6:
1223     case Intrinsic::riscv_vloxseg7:
1224     case Intrinsic::riscv_vloxseg8:
1225       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1226       return;
1227     case Intrinsic::riscv_vluxseg2:
1228     case Intrinsic::riscv_vluxseg3:
1229     case Intrinsic::riscv_vluxseg4:
1230     case Intrinsic::riscv_vluxseg5:
1231     case Intrinsic::riscv_vluxseg6:
1232     case Intrinsic::riscv_vluxseg7:
1233     case Intrinsic::riscv_vluxseg8:
1234       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1235       return;
1236     case Intrinsic::riscv_vloxseg2_mask:
1237     case Intrinsic::riscv_vloxseg3_mask:
1238     case Intrinsic::riscv_vloxseg4_mask:
1239     case Intrinsic::riscv_vloxseg5_mask:
1240     case Intrinsic::riscv_vloxseg6_mask:
1241     case Intrinsic::riscv_vloxseg7_mask:
1242     case Intrinsic::riscv_vloxseg8_mask:
1243       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1244       return;
1245     case Intrinsic::riscv_vluxseg2_mask:
1246     case Intrinsic::riscv_vluxseg3_mask:
1247     case Intrinsic::riscv_vluxseg4_mask:
1248     case Intrinsic::riscv_vluxseg5_mask:
1249     case Intrinsic::riscv_vluxseg6_mask:
1250     case Intrinsic::riscv_vluxseg7_mask:
1251     case Intrinsic::riscv_vluxseg8_mask:
1252       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1253       return;
1254     case Intrinsic::riscv_vlseg8ff:
1255     case Intrinsic::riscv_vlseg7ff:
1256     case Intrinsic::riscv_vlseg6ff:
1257     case Intrinsic::riscv_vlseg5ff:
1258     case Intrinsic::riscv_vlseg4ff:
1259     case Intrinsic::riscv_vlseg3ff:
1260     case Intrinsic::riscv_vlseg2ff: {
1261       selectVLSEGFF(Node, /*IsMasked*/ false);
1262       return;
1263     }
1264     case Intrinsic::riscv_vlseg8ff_mask:
1265     case Intrinsic::riscv_vlseg7ff_mask:
1266     case Intrinsic::riscv_vlseg6ff_mask:
1267     case Intrinsic::riscv_vlseg5ff_mask:
1268     case Intrinsic::riscv_vlseg4ff_mask:
1269     case Intrinsic::riscv_vlseg3ff_mask:
1270     case Intrinsic::riscv_vlseg2ff_mask: {
1271       selectVLSEGFF(Node, /*IsMasked*/ true);
1272       return;
1273     }
1274     case Intrinsic::riscv_vloxei:
1275     case Intrinsic::riscv_vloxei_mask:
1276     case Intrinsic::riscv_vluxei:
1277     case Intrinsic::riscv_vluxei_mask: {
1278       bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1279                       IntNo == Intrinsic::riscv_vluxei_mask;
1280       bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1281                        IntNo == Intrinsic::riscv_vloxei_mask;
1282 
1283       MVT VT = Node->getSimpleValueType(0);
1284       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1285 
1286       unsigned CurOp = 2;
1287       // Masked intrinsic only have TU version pseduo instructions.
1288       bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
1289       SmallVector<SDValue, 8> Operands;
1290       if (IsTU)
1291         Operands.push_back(Node->getOperand(CurOp++));
1292       else
1293         // Skip the undef passthru operand for nomask TA version pseudo
1294         CurOp++;
1295 
1296       MVT IndexVT;
1297       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1298                                  /*IsStridedOrIndexed*/ true, Operands,
1299                                  /*IsLoad=*/true, &IndexVT);
1300 
1301       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1302              "Element count mismatch");
1303 
1304       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1305       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1306       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1307       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1308         report_fatal_error("The V extension does not support EEW=64 for index "
1309                            "values when XLEN=32");
1310       }
1311       const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1312           IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1313           static_cast<unsigned>(IndexLMUL));
1314       MachineSDNode *Load =
1315           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1316 
1317       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1318         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1319 
1320       ReplaceNode(Node, Load);
1321       return;
1322     }
1323     case Intrinsic::riscv_vlm:
1324     case Intrinsic::riscv_vle:
1325     case Intrinsic::riscv_vle_mask:
1326     case Intrinsic::riscv_vlse:
1327     case Intrinsic::riscv_vlse_mask: {
1328       bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1329                       IntNo == Intrinsic::riscv_vlse_mask;
1330       bool IsStrided =
1331           IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1332 
1333       MVT VT = Node->getSimpleValueType(0);
1334       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1335 
1336       unsigned CurOp = 2;
1337       // The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
1338       bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1339       // Masked intrinsic only have TU version pseduo instructions.
1340       bool IsTU =
1341           HasPassthruOperand &&
1342           ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked);
1343       SmallVector<SDValue, 8> Operands;
1344       if (IsTU)
1345         Operands.push_back(Node->getOperand(CurOp++));
1346       else if (HasPassthruOperand)
1347         // Skip the undef passthru operand for nomask TA version pseudo
1348         CurOp++;
1349 
1350       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1351                                  Operands, /*IsLoad=*/true);
1352 
1353       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1354       const RISCV::VLEPseudo *P =
1355           RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
1356                               static_cast<unsigned>(LMUL));
1357       MachineSDNode *Load =
1358           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1359 
1360       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1361         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1362 
1363       ReplaceNode(Node, Load);
1364       return;
1365     }
1366     case Intrinsic::riscv_vleff:
1367     case Intrinsic::riscv_vleff_mask: {
1368       bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1369 
1370       MVT VT = Node->getSimpleValueType(0);
1371       unsigned SEW = VT.getScalarSizeInBits();
1372       unsigned Log2SEW = Log2_32(SEW);
1373 
1374       unsigned CurOp = 2;
1375       // Masked intrinsic only have TU version pseduo instructions.
1376       bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
1377       SmallVector<SDValue, 7> Operands;
1378       if (IsTU)
1379         Operands.push_back(Node->getOperand(CurOp++));
1380       else
1381         // Skip the undef passthru operand for nomask TA version pseudo
1382         CurOp++;
1383 
1384       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1385                                  /*IsStridedOrIndexed*/ false, Operands,
1386                                  /*IsLoad=*/true);
1387 
1388       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1389       const RISCV::VLEPseudo *P =
1390           RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
1391                               Log2SEW, static_cast<unsigned>(LMUL));
1392       MachineSDNode *Load =
1393           CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0),
1394                                  MVT::Other, MVT::Glue, Operands);
1395       bool TailAgnostic = !IsTU;
1396       bool MaskAgnostic = false;
1397       if (IsMasked) {
1398         uint64_t Policy =
1399             Node->getConstantOperandVal(Node->getNumOperands() - 1);
1400         TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
1401         MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
1402       }
1403       unsigned VType =
1404           RISCVVType::encodeVTYPE(LMUL, SEW, TailAgnostic, MaskAgnostic);
1405       SDValue VTypeOp = CurDAG->getTargetConstant(VType, DL, XLenVT);
1406       SDNode *ReadVL =
1407           CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, VTypeOp,
1408                                  /*Glue*/ SDValue(Load, 2));
1409 
1410       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1411         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1412 
1413       ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
1414       ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL
1415       ReplaceUses(SDValue(Node, 2), SDValue(Load, 1));   // Chain
1416       CurDAG->RemoveDeadNode(Node);
1417       return;
1418     }
1419     }
1420     break;
1421   }
1422   case ISD::INTRINSIC_VOID: {
1423     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1424     switch (IntNo) {
1425     case Intrinsic::riscv_vsseg2:
1426     case Intrinsic::riscv_vsseg3:
1427     case Intrinsic::riscv_vsseg4:
1428     case Intrinsic::riscv_vsseg5:
1429     case Intrinsic::riscv_vsseg6:
1430     case Intrinsic::riscv_vsseg7:
1431     case Intrinsic::riscv_vsseg8: {
1432       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1433       return;
1434     }
1435     case Intrinsic::riscv_vsseg2_mask:
1436     case Intrinsic::riscv_vsseg3_mask:
1437     case Intrinsic::riscv_vsseg4_mask:
1438     case Intrinsic::riscv_vsseg5_mask:
1439     case Intrinsic::riscv_vsseg6_mask:
1440     case Intrinsic::riscv_vsseg7_mask:
1441     case Intrinsic::riscv_vsseg8_mask: {
1442       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1443       return;
1444     }
1445     case Intrinsic::riscv_vssseg2:
1446     case Intrinsic::riscv_vssseg3:
1447     case Intrinsic::riscv_vssseg4:
1448     case Intrinsic::riscv_vssseg5:
1449     case Intrinsic::riscv_vssseg6:
1450     case Intrinsic::riscv_vssseg7:
1451     case Intrinsic::riscv_vssseg8: {
1452       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1453       return;
1454     }
1455     case Intrinsic::riscv_vssseg2_mask:
1456     case Intrinsic::riscv_vssseg3_mask:
1457     case Intrinsic::riscv_vssseg4_mask:
1458     case Intrinsic::riscv_vssseg5_mask:
1459     case Intrinsic::riscv_vssseg6_mask:
1460     case Intrinsic::riscv_vssseg7_mask:
1461     case Intrinsic::riscv_vssseg8_mask: {
1462       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1463       return;
1464     }
1465     case Intrinsic::riscv_vsoxseg2:
1466     case Intrinsic::riscv_vsoxseg3:
1467     case Intrinsic::riscv_vsoxseg4:
1468     case Intrinsic::riscv_vsoxseg5:
1469     case Intrinsic::riscv_vsoxseg6:
1470     case Intrinsic::riscv_vsoxseg7:
1471     case Intrinsic::riscv_vsoxseg8:
1472       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1473       return;
1474     case Intrinsic::riscv_vsuxseg2:
1475     case Intrinsic::riscv_vsuxseg3:
1476     case Intrinsic::riscv_vsuxseg4:
1477     case Intrinsic::riscv_vsuxseg5:
1478     case Intrinsic::riscv_vsuxseg6:
1479     case Intrinsic::riscv_vsuxseg7:
1480     case Intrinsic::riscv_vsuxseg8:
1481       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1482       return;
1483     case Intrinsic::riscv_vsoxseg2_mask:
1484     case Intrinsic::riscv_vsoxseg3_mask:
1485     case Intrinsic::riscv_vsoxseg4_mask:
1486     case Intrinsic::riscv_vsoxseg5_mask:
1487     case Intrinsic::riscv_vsoxseg6_mask:
1488     case Intrinsic::riscv_vsoxseg7_mask:
1489     case Intrinsic::riscv_vsoxseg8_mask:
1490       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1491       return;
1492     case Intrinsic::riscv_vsuxseg2_mask:
1493     case Intrinsic::riscv_vsuxseg3_mask:
1494     case Intrinsic::riscv_vsuxseg4_mask:
1495     case Intrinsic::riscv_vsuxseg5_mask:
1496     case Intrinsic::riscv_vsuxseg6_mask:
1497     case Intrinsic::riscv_vsuxseg7_mask:
1498     case Intrinsic::riscv_vsuxseg8_mask:
1499       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1500       return;
1501     case Intrinsic::riscv_vsoxei:
1502     case Intrinsic::riscv_vsoxei_mask:
1503     case Intrinsic::riscv_vsuxei:
1504     case Intrinsic::riscv_vsuxei_mask: {
1505       bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
1506                       IntNo == Intrinsic::riscv_vsuxei_mask;
1507       bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
1508                        IntNo == Intrinsic::riscv_vsoxei_mask;
1509 
1510       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1511       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1512 
1513       unsigned CurOp = 2;
1514       SmallVector<SDValue, 8> Operands;
1515       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1516 
1517       MVT IndexVT;
1518       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1519                                  /*IsStridedOrIndexed*/ true, Operands,
1520                                  /*IsLoad=*/false, &IndexVT);
1521 
1522       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1523              "Element count mismatch");
1524 
1525       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1526       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1527       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1528       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1529         report_fatal_error("The V extension does not support EEW=64 for index "
1530                            "values when XLEN=32");
1531       }
1532       const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
1533           IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW,
1534           static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
1535       MachineSDNode *Store =
1536           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1537 
1538       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1539         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1540 
1541       ReplaceNode(Node, Store);
1542       return;
1543     }
1544     case Intrinsic::riscv_vsm:
1545     case Intrinsic::riscv_vse:
1546     case Intrinsic::riscv_vse_mask:
1547     case Intrinsic::riscv_vsse:
1548     case Intrinsic::riscv_vsse_mask: {
1549       bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
1550                       IntNo == Intrinsic::riscv_vsse_mask;
1551       bool IsStrided =
1552           IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
1553 
1554       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1555       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1556 
1557       unsigned CurOp = 2;
1558       SmallVector<SDValue, 8> Operands;
1559       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1560 
1561       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1562                                  Operands);
1563 
1564       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1565       const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
1566           IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
1567       MachineSDNode *Store =
1568           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1569       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1570         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1571 
1572       ReplaceNode(Node, Store);
1573       return;
1574     }
1575     }
1576     break;
1577   }
1578   case ISD::BITCAST: {
1579     MVT SrcVT = Node->getOperand(0).getSimpleValueType();
1580     // Just drop bitcasts between vectors if both are fixed or both are
1581     // scalable.
1582     if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
1583         (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
1584       ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
1585       CurDAG->RemoveDeadNode(Node);
1586       return;
1587     }
1588     break;
1589   }
1590   case ISD::INSERT_SUBVECTOR: {
1591     SDValue V = Node->getOperand(0);
1592     SDValue SubV = Node->getOperand(1);
1593     SDLoc DL(SubV);
1594     auto Idx = Node->getConstantOperandVal(2);
1595     MVT SubVecVT = SubV.getSimpleValueType();
1596 
1597     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1598     MVT SubVecContainerVT = SubVecVT;
1599     // Establish the correct scalable-vector types for any fixed-length type.
1600     if (SubVecVT.isFixedLengthVector())
1601       SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
1602     if (VT.isFixedLengthVector())
1603       VT = TLI.getContainerForFixedLengthVector(VT);
1604 
1605     const auto *TRI = Subtarget->getRegisterInfo();
1606     unsigned SubRegIdx;
1607     std::tie(SubRegIdx, Idx) =
1608         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1609             VT, SubVecContainerVT, Idx, TRI);
1610 
1611     // If the Idx hasn't been completely eliminated then this is a subvector
1612     // insert which doesn't naturally align to a vector register. These must
1613     // be handled using instructions to manipulate the vector registers.
1614     if (Idx != 0)
1615       break;
1616 
1617     RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
1618     bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
1619                            SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
1620                            SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
1621     (void)IsSubVecPartReg; // Silence unused variable warning without asserts.
1622     assert((!IsSubVecPartReg || V.isUndef()) &&
1623            "Expecting lowering to have created legal INSERT_SUBVECTORs when "
1624            "the subvector is smaller than a full-sized register");
1625 
1626     // If we haven't set a SubRegIdx, then we must be going between
1627     // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
1628     if (SubRegIdx == RISCV::NoSubRegister) {
1629       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT);
1630       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
1631                  InRegClassID &&
1632              "Unexpected subvector extraction");
1633       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
1634       SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
1635                                                DL, VT, SubV, RC);
1636       ReplaceNode(Node, NewNode);
1637       return;
1638     }
1639 
1640     SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
1641     ReplaceNode(Node, Insert.getNode());
1642     return;
1643   }
1644   case ISD::EXTRACT_SUBVECTOR: {
1645     SDValue V = Node->getOperand(0);
1646     auto Idx = Node->getConstantOperandVal(1);
1647     MVT InVT = V.getSimpleValueType();
1648     SDLoc DL(V);
1649 
1650     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1651     MVT SubVecContainerVT = VT;
1652     // Establish the correct scalable-vector types for any fixed-length type.
1653     if (VT.isFixedLengthVector())
1654       SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
1655     if (InVT.isFixedLengthVector())
1656       InVT = TLI.getContainerForFixedLengthVector(InVT);
1657 
1658     const auto *TRI = Subtarget->getRegisterInfo();
1659     unsigned SubRegIdx;
1660     std::tie(SubRegIdx, Idx) =
1661         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1662             InVT, SubVecContainerVT, Idx, TRI);
1663 
1664     // If the Idx hasn't been completely eliminated then this is a subvector
1665     // extract which doesn't naturally align to a vector register. These must
1666     // be handled using instructions to manipulate the vector registers.
1667     if (Idx != 0)
1668       break;
1669 
1670     // If we haven't set a SubRegIdx, then we must be going between
1671     // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
1672     if (SubRegIdx == RISCV::NoSubRegister) {
1673       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
1674       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
1675                  InRegClassID &&
1676              "Unexpected subvector extraction");
1677       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
1678       SDNode *NewNode =
1679           CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
1680       ReplaceNode(Node, NewNode);
1681       return;
1682     }
1683 
1684     SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
1685     ReplaceNode(Node, Extract.getNode());
1686     return;
1687   }
1688   case ISD::SPLAT_VECTOR:
1689   case RISCVISD::VMV_S_X_VL:
1690   case RISCVISD::VFMV_S_F_VL:
1691   case RISCVISD::VMV_V_X_VL:
1692   case RISCVISD::VFMV_V_F_VL: {
1693     // Try to match splat of a scalar load to a strided load with stride of x0.
1694     bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
1695                         Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
1696     bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR;
1697     if (HasPassthruOperand && !Node->getOperand(0).isUndef())
1698       break;
1699     SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0);
1700     auto *Ld = dyn_cast<LoadSDNode>(Src);
1701     if (!Ld)
1702       break;
1703     EVT MemVT = Ld->getMemoryVT();
1704     // The memory VT should be the same size as the element type.
1705     if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
1706       break;
1707     if (!IsProfitableToFold(Src, Node, Node) ||
1708         !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
1709       break;
1710 
1711     SDValue VL;
1712     if (Node->getOpcode() == ISD::SPLAT_VECTOR)
1713       VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1714     else if (IsScalarMove) {
1715       // We could deal with more VL if we update the VSETVLI insert pass to
1716       // avoid introducing more VSETVLI.
1717       if (!isOneConstant(Node->getOperand(2)))
1718         break;
1719       selectVLOp(Node->getOperand(2), VL);
1720     } else
1721       selectVLOp(Node->getOperand(2), VL);
1722 
1723     unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1724     SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
1725 
1726     SDValue Operands[] = {Ld->getBasePtr(),
1727                           CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW,
1728                           Ld->getChain()};
1729 
1730     RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1731     const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
1732         /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false,
1733         Log2SEW, static_cast<unsigned>(LMUL));
1734     MachineSDNode *Load =
1735         CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1736 
1737     CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
1738 
1739     ReplaceNode(Node, Load);
1740     return;
1741   }
1742   }
1743 
1744   // Select the default instruction.
1745   SelectCode(Node);
1746 }
1747 
1748 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
1749     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
1750   switch (ConstraintID) {
1751   case InlineAsm::Constraint_m:
1752     // We just support simple memory operands that have a single address
1753     // operand and need no special handling.
1754     OutOps.push_back(Op);
1755     return false;
1756   case InlineAsm::Constraint_A:
1757     OutOps.push_back(Op);
1758     return false;
1759   default:
1760     break;
1761   }
1762 
1763   return true;
1764 }
1765 
1766 bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
1767   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
1768     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
1769     return true;
1770   }
1771   return false;
1772 }
1773 
1774 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {
1775   // If this is FrameIndex, select it directly. Otherwise just let it get
1776   // selected to a register independently.
1777   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr))
1778     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
1779   else
1780     Base = Addr;
1781   return true;
1782 }
1783 
1784 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
1785                                         SDValue &ShAmt) {
1786   // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift
1787   // amount. If there is an AND on the shift amount, we can bypass it if it
1788   // doesn't affect any of those bits.
1789   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
1790     const APInt &AndMask = N->getConstantOperandAPInt(1);
1791 
1792     // Since the max shift amount is a power of 2 we can subtract 1 to make a
1793     // mask that covers the bits needed to represent all shift amounts.
1794     assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
1795     APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
1796 
1797     if (ShMask.isSubsetOf(AndMask)) {
1798       ShAmt = N.getOperand(0);
1799       return true;
1800     }
1801 
1802     // SimplifyDemandedBits may have optimized the mask so try restoring any
1803     // bits that are known zero.
1804     KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0));
1805     if (ShMask.isSubsetOf(AndMask | Known.Zero)) {
1806       ShAmt = N.getOperand(0);
1807       return true;
1808     }
1809   } else if (N.getOpcode() == ISD::SUB &&
1810              isa<ConstantSDNode>(N.getOperand(0))) {
1811     uint64_t Imm = N.getConstantOperandVal(0);
1812     // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
1813     // generate a NEG instead of a SUB of a constant.
1814     if (Imm != 0 && Imm % ShiftWidth == 0) {
1815       SDLoc DL(N);
1816       EVT VT = N.getValueType();
1817       SDValue Zero =
1818           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1819       unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
1820       MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
1821                                                   N.getOperand(1));
1822       ShAmt = SDValue(Neg, 0);
1823       return true;
1824     }
1825   }
1826 
1827   ShAmt = N;
1828   return true;
1829 }
1830 
1831 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) {
1832   if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1833       cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
1834     Val = N.getOperand(0);
1835     return true;
1836   }
1837   MVT VT = N.getSimpleValueType();
1838   if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) {
1839     Val = N;
1840     return true;
1841   }
1842 
1843   return false;
1844 }
1845 
1846 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
1847   if (N.getOpcode() == ISD::AND) {
1848     auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
1849     if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) {
1850       Val = N.getOperand(0);
1851       return true;
1852     }
1853   }
1854   MVT VT = N.getSimpleValueType();
1855   APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32);
1856   if (CurDAG->MaskedValueIsZero(N, Mask)) {
1857     Val = N;
1858     return true;
1859   }
1860 
1861   return false;
1862 }
1863 
1864 // Return true if all users of this SDNode* only consume the lower \p Bits.
1865 // This can be used to form W instructions for add/sub/mul/shl even when the
1866 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
1867 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
1868 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
1869 // the add/sub/mul/shl to become non-W instructions. By checking the users we
1870 // may be able to use a W instruction and CSE with the other instruction if
1871 // this has happened. We could try to detect that the CSE opportunity exists
1872 // before doing this, but that would be more complicated.
1873 // TODO: Does this need to look through AND/OR/XOR to their users to find more
1874 // opportunities.
1875 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
1876   assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
1877           Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
1878           Node->getOpcode() == ISD::SRL ||
1879           Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
1880           Node->getOpcode() == RISCVISD::GREV ||
1881           Node->getOpcode() == RISCVISD::GORC ||
1882           isa<ConstantSDNode>(Node)) &&
1883          "Unexpected opcode");
1884 
1885   for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
1886     SDNode *User = *UI;
1887     // Users of this node should have already been instruction selected
1888     if (!User->isMachineOpcode())
1889       return false;
1890 
1891     // TODO: Add more opcodes?
1892     switch (User->getMachineOpcode()) {
1893     default:
1894       return false;
1895     case RISCV::ADDW:
1896     case RISCV::ADDIW:
1897     case RISCV::SUBW:
1898     case RISCV::MULW:
1899     case RISCV::SLLW:
1900     case RISCV::SLLIW:
1901     case RISCV::SRAW:
1902     case RISCV::SRAIW:
1903     case RISCV::SRLW:
1904     case RISCV::SRLIW:
1905     case RISCV::DIVW:
1906     case RISCV::DIVUW:
1907     case RISCV::REMW:
1908     case RISCV::REMUW:
1909     case RISCV::ROLW:
1910     case RISCV::RORW:
1911     case RISCV::RORIW:
1912     case RISCV::CLZW:
1913     case RISCV::CTZW:
1914     case RISCV::CPOPW:
1915     case RISCV::SLLI_UW:
1916     case RISCV::FMV_W_X:
1917     case RISCV::FCVT_H_W:
1918     case RISCV::FCVT_H_WU:
1919     case RISCV::FCVT_S_W:
1920     case RISCV::FCVT_S_WU:
1921     case RISCV::FCVT_D_W:
1922     case RISCV::FCVT_D_WU:
1923       if (Bits < 32)
1924         return false;
1925       break;
1926     case RISCV::SLLI:
1927       // SLLI only uses the lower (XLen - ShAmt) bits.
1928       if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
1929         return false;
1930       break;
1931     case RISCV::ANDI:
1932       if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1))))
1933         return false;
1934       break;
1935     case RISCV::SEXT_B:
1936       if (Bits < 8)
1937         return false;
1938       break;
1939     case RISCV::SEXT_H:
1940     case RISCV::FMV_H_X:
1941     case RISCV::ZEXT_H_RV32:
1942     case RISCV::ZEXT_H_RV64:
1943       if (Bits < 16)
1944         return false;
1945       break;
1946     case RISCV::ADD_UW:
1947     case RISCV::SH1ADD_UW:
1948     case RISCV::SH2ADD_UW:
1949     case RISCV::SH3ADD_UW:
1950       // The first operand to add.uw/shXadd.uw is implicitly zero extended from
1951       // 32 bits.
1952       if (UI.getOperandNo() != 0 || Bits < 32)
1953         return false;
1954       break;
1955     case RISCV::SB:
1956       if (UI.getOperandNo() != 0 || Bits < 8)
1957         return false;
1958       break;
1959     case RISCV::SH:
1960       if (UI.getOperandNo() != 0 || Bits < 16)
1961         return false;
1962       break;
1963     case RISCV::SW:
1964       if (UI.getOperandNo() != 0 || Bits < 32)
1965         return false;
1966       break;
1967     }
1968   }
1969 
1970   return true;
1971 }
1972 
1973 // Select VL as a 5 bit immediate or a value that will become a register. This
1974 // allows us to choose betwen VSETIVLI or VSETVLI later.
1975 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
1976   auto *C = dyn_cast<ConstantSDNode>(N);
1977   if (C && isUInt<5>(C->getZExtValue())) {
1978     VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
1979                                    N->getValueType(0));
1980   } else if (C && C->isAllOnesValue()) {
1981     // Treat all ones as VLMax.
1982     VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
1983                                    N->getValueType(0));
1984   } else if (isa<RegisterSDNode>(N) &&
1985              cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
1986     // All our VL operands use an operand that allows GPRNoX0 or an immediate
1987     // as the register class. Convert X0 to a special immediate to pass the
1988     // MachineVerifier. This is recognized specially by the vsetvli insertion
1989     // pass.
1990     VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
1991                                    N->getValueType(0));
1992   } else {
1993     VL = N;
1994   }
1995 
1996   return true;
1997 }
1998 
1999 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
2000   if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())
2001     return false;
2002   SplatVal = N.getOperand(1);
2003   return true;
2004 }
2005 
2006 using ValidateFn = bool (*)(int64_t);
2007 
2008 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
2009                                    SelectionDAG &DAG,
2010                                    const RISCVSubtarget &Subtarget,
2011                                    ValidateFn ValidateImm) {
2012   if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
2013       !isa<ConstantSDNode>(N.getOperand(1)))
2014     return false;
2015 
2016   int64_t SplatImm =
2017       cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
2018 
2019   // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
2020   // type is wider than the resulting vector element type: an implicit
2021   // truncation first takes place. Therefore, perform a manual
2022   // truncation/sign-extension in order to ignore any truncated bits and catch
2023   // any zero-extended immediate.
2024   // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
2025   // sign-extending to (XLenVT -1).
2026   MVT XLenVT = Subtarget.getXLenVT();
2027   assert(XLenVT == N.getOperand(1).getSimpleValueType() &&
2028          "Unexpected splat operand type");
2029   MVT EltVT = N.getSimpleValueType().getVectorElementType();
2030   if (EltVT.bitsLT(XLenVT))
2031     SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
2032 
2033   if (!ValidateImm(SplatImm))
2034     return false;
2035 
2036   SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT);
2037   return true;
2038 }
2039 
2040 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
2041   return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget,
2042                                 [](int64_t Imm) { return isInt<5>(Imm); });
2043 }
2044 
2045 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
2046   return selectVSplatSimmHelper(
2047       N, SplatVal, *CurDAG, *Subtarget,
2048       [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
2049 }
2050 
2051 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
2052                                                       SDValue &SplatVal) {
2053   return selectVSplatSimmHelper(
2054       N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
2055         return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
2056       });
2057 }
2058 
2059 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
2060   if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
2061       !isa<ConstantSDNode>(N.getOperand(1)))
2062     return false;
2063 
2064   int64_t SplatImm =
2065       cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
2066 
2067   if (!isUInt<5>(SplatImm))
2068     return false;
2069 
2070   SplatVal =
2071       CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
2072 
2073   return true;
2074 }
2075 
2076 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
2077                                        SDValue &Imm) {
2078   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
2079     int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
2080 
2081     if (!isInt<5>(ImmVal))
2082       return false;
2083 
2084     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
2085     return true;
2086   }
2087 
2088   return false;
2089 }
2090 
2091 // Merge an ADDI into the offset of a load/store instruction where possible.
2092 // (load (addi base, off1), off2) -> (load base, off1+off2)
2093 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
2094 // (load (add base, (addi src, off1)), off2)
2095 //    -> (load (add base, src), off1+off2)
2096 // (store val, (add base, (addi src, off1)), off2)
2097 //    -> (store val, (add base, src), off1+off2)
2098 // This is possible when off1+off2 fits a 12-bit immediate.
2099 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
2100   int OffsetOpIdx;
2101   int BaseOpIdx;
2102 
2103   // Only attempt this optimisation for I-type loads and S-type stores.
2104   switch (N->getMachineOpcode()) {
2105   default:
2106     return false;
2107   case RISCV::LB:
2108   case RISCV::LH:
2109   case RISCV::LW:
2110   case RISCV::LBU:
2111   case RISCV::LHU:
2112   case RISCV::LWU:
2113   case RISCV::LD:
2114   case RISCV::FLH:
2115   case RISCV::FLW:
2116   case RISCV::FLD:
2117     BaseOpIdx = 0;
2118     OffsetOpIdx = 1;
2119     break;
2120   case RISCV::SB:
2121   case RISCV::SH:
2122   case RISCV::SW:
2123   case RISCV::SD:
2124   case RISCV::FSH:
2125   case RISCV::FSW:
2126   case RISCV::FSD:
2127     BaseOpIdx = 1;
2128     OffsetOpIdx = 2;
2129     break;
2130   }
2131 
2132   if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
2133     return false;
2134 
2135   SDValue Base = N->getOperand(BaseOpIdx);
2136 
2137   if (!Base.isMachineOpcode())
2138     return false;
2139 
2140   // There is a ADD between ADDI and load/store. We can only fold ADDI that
2141   // do not have a FrameIndex operand.
2142   SDValue Add;
2143   unsigned AddBaseIdx;
2144   if (Base.getMachineOpcode() == RISCV::ADD && Base.hasOneUse()) {
2145     Add = Base;
2146     SDValue Op0 = Base.getOperand(0);
2147     SDValue Op1 = Base.getOperand(1);
2148     if (Op0.isMachineOpcode() && Op0.getMachineOpcode() == RISCV::ADDI &&
2149         !isa<FrameIndexSDNode>(Op0.getOperand(0)) &&
2150         isa<ConstantSDNode>(Op0.getOperand(1))) {
2151       AddBaseIdx = 1;
2152       Base = Op0;
2153     } else if (Op1.isMachineOpcode() && Op1.getMachineOpcode() == RISCV::ADDI &&
2154                !isa<FrameIndexSDNode>(Op1.getOperand(0)) &&
2155                isa<ConstantSDNode>(Op1.getOperand(1))) {
2156       AddBaseIdx = 0;
2157       Base = Op1;
2158     } else if (Op1.isMachineOpcode() &&
2159                Op1.getMachineOpcode() == RISCV::ADDIW &&
2160                isa<ConstantSDNode>(Op1.getOperand(1)) &&
2161                Op1.getOperand(0).isMachineOpcode() &&
2162                Op1.getOperand(0).getMachineOpcode() == RISCV::LUI) {
2163       // We found an LUI+ADDIW constant materialization. We might be able to
2164       // fold the ADDIW offset if it could be treated as ADDI.
2165       // Emulate the constant materialization to see if the result would be
2166       // a simm32 if ADDI was used instead of ADDIW.
2167 
2168       // First the LUI.
2169       uint64_t Imm = Op1.getOperand(0).getConstantOperandVal(0);
2170       Imm <<= 12;
2171       Imm = SignExtend64<32>(Imm);
2172 
2173       // Then the ADDI.
2174       uint64_t LoImm = cast<ConstantSDNode>(Op1.getOperand(1))->getSExtValue();
2175       Imm += LoImm;
2176 
2177       // If the result isn't a simm32, we can't do the optimization.
2178       if (!isInt<32>(Imm))
2179         return false;
2180 
2181       AddBaseIdx = 0;
2182       Base = Op1;
2183     } else
2184       return false;
2185   } else if (Base.getMachineOpcode() == RISCV::ADDI) {
2186     // If the base is an ADDI, we can merge it in to the load/store.
2187   } else
2188     return false;
2189 
2190   SDValue ImmOperand = Base.getOperand(1);
2191   uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
2192 
2193   if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
2194     int64_t Offset1 = Const->getSExtValue();
2195     int64_t CombinedOffset = Offset1 + Offset2;
2196     if (!isInt<12>(CombinedOffset))
2197       return false;
2198     ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
2199                                            ImmOperand.getValueType());
2200   } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
2201     // If the off1 in (addi base, off1) is a global variable's address (its
2202     // low part, really), then we can rely on the alignment of that variable
2203     // to provide a margin of safety before off1 can overflow the 12 bits.
2204     // Check if off2 falls within that margin; if so off1+off2 can't overflow.
2205     const DataLayout &DL = CurDAG->getDataLayout();
2206     Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
2207     if (Offset2 != 0 && Alignment <= Offset2)
2208       return false;
2209     int64_t Offset1 = GA->getOffset();
2210     int64_t CombinedOffset = Offset1 + Offset2;
2211     ImmOperand = CurDAG->getTargetGlobalAddress(
2212         GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
2213         CombinedOffset, GA->getTargetFlags());
2214   } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
2215     // Ditto.
2216     Align Alignment = CP->getAlign();
2217     if (Offset2 != 0 && Alignment <= Offset2)
2218       return false;
2219     int64_t Offset1 = CP->getOffset();
2220     int64_t CombinedOffset = Offset1 + Offset2;
2221     ImmOperand = CurDAG->getTargetConstantPool(
2222         CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
2223         CombinedOffset, CP->getTargetFlags());
2224   } else {
2225     return false;
2226   }
2227 
2228   LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
2229   LLVM_DEBUG(Base->dump(CurDAG));
2230   LLVM_DEBUG(dbgs() << "\nN: ");
2231   LLVM_DEBUG(N->dump(CurDAG));
2232   LLVM_DEBUG(dbgs() << "\n");
2233 
2234   if (Add)
2235     Add = SDValue(CurDAG->UpdateNodeOperands(Add.getNode(),
2236                                              Add.getOperand(AddBaseIdx),
2237                                              Base.getOperand(0)),
2238                   0);
2239 
2240   // Modify the offset operand of the load/store.
2241   if (BaseOpIdx == 0) { // Load
2242     if (Add)
2243       N = CurDAG->UpdateNodeOperands(N, Add, ImmOperand, N->getOperand(2));
2244     else
2245       N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
2246                                      N->getOperand(2));
2247   } else { // Store
2248     if (Add)
2249       N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Add, ImmOperand,
2250                                      N->getOperand(3));
2251     else
2252       N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
2253                                      ImmOperand, N->getOperand(3));
2254   }
2255 
2256   return true;
2257 }
2258 
2259 // Try to remove sext.w if the input is a W instruction or can be made into
2260 // a W instruction cheaply.
2261 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
2262   // Look for the sext.w pattern, addiw rd, rs1, 0.
2263   if (N->getMachineOpcode() != RISCV::ADDIW ||
2264       !isNullConstant(N->getOperand(1)))
2265     return false;
2266 
2267   SDValue N0 = N->getOperand(0);
2268   if (!N0.isMachineOpcode())
2269     return false;
2270 
2271   switch (N0.getMachineOpcode()) {
2272   default:
2273     break;
2274   case RISCV::ADD:
2275   case RISCV::ADDI:
2276   case RISCV::SUB:
2277   case RISCV::MUL:
2278   case RISCV::SLLI: {
2279     // Convert sext.w+add/sub/mul to their W instructions. This will create
2280     // a new independent instruction. This improves latency.
2281     unsigned Opc;
2282     switch (N0.getMachineOpcode()) {
2283     default:
2284       llvm_unreachable("Unexpected opcode!");
2285     case RISCV::ADD:  Opc = RISCV::ADDW;  break;
2286     case RISCV::ADDI: Opc = RISCV::ADDIW; break;
2287     case RISCV::SUB:  Opc = RISCV::SUBW;  break;
2288     case RISCV::MUL:  Opc = RISCV::MULW;  break;
2289     case RISCV::SLLI: Opc = RISCV::SLLIW; break;
2290     }
2291 
2292     SDValue N00 = N0.getOperand(0);
2293     SDValue N01 = N0.getOperand(1);
2294 
2295     // Shift amount needs to be uimm5.
2296     if (N0.getMachineOpcode() == RISCV::SLLI &&
2297         !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
2298       break;
2299 
2300     SDNode *Result =
2301         CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
2302                                N00, N01);
2303     ReplaceUses(N, Result);
2304     return true;
2305   }
2306   case RISCV::ADDW:
2307   case RISCV::ADDIW:
2308   case RISCV::SUBW:
2309   case RISCV::MULW:
2310   case RISCV::SLLIW:
2311   case RISCV::GREVIW:
2312   case RISCV::GORCIW:
2313     // Result is already sign extended just remove the sext.w.
2314     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
2315     ReplaceUses(N, N0.getNode());
2316     return true;
2317   }
2318 
2319   return false;
2320 }
2321 
2322 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
2323 // corresponding "unmasked" pseudo versions. The mask we're interested in will
2324 // take the form of a V0 physical register operand, with a glued
2325 // register-setting instruction.
2326 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
2327   const RISCV::RISCVMaskedPseudoInfo *I =
2328       RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
2329   if (!I)
2330     return false;
2331 
2332   unsigned MaskOpIdx = I->MaskOpIdx;
2333 
2334   // Check that we're using V0 as a mask register.
2335   if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) ||
2336       cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0)
2337     return false;
2338 
2339   // The glued user defines V0.
2340   const auto *Glued = N->getGluedNode();
2341 
2342   if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
2343     return false;
2344 
2345   // Check that we're defining V0 as a mask register.
2346   if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
2347       cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
2348     return false;
2349 
2350   // Check the instruction defining V0; it needs to be a VMSET pseudo.
2351   SDValue MaskSetter = Glued->getOperand(2);
2352 
2353   const auto IsVMSet = [](unsigned Opc) {
2354     return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
2355            Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
2356            Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
2357            Opc == RISCV::PseudoVMSET_M_B8;
2358   };
2359 
2360   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
2361   // undefined behaviour if it's the wrong bitwidth, so we could choose to
2362   // assume that it's all-ones? Same applies to its VL.
2363   if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode()))
2364     return false;
2365 
2366   // Retrieve the tail policy operand index, if any.
2367   Optional<unsigned> TailPolicyOpIdx;
2368   const RISCVInstrInfo *TII = static_cast<const RISCVInstrInfo *>(
2369       CurDAG->getSubtarget().getInstrInfo());
2370 
2371   const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
2372 
2373   bool IsTA = true;
2374   if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) {
2375     // The last operand of the pseudo is the policy op, but we might have a
2376     // Glue operand last. We might also have a chain.
2377     TailPolicyOpIdx = N->getNumOperands() - 1;
2378     if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue)
2379       (*TailPolicyOpIdx)--;
2380     if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other)
2381       (*TailPolicyOpIdx)--;
2382 
2383     if (!(N->getConstantOperandVal(*TailPolicyOpIdx) &
2384           RISCVII::TAIL_AGNOSTIC)) {
2385       // Keep the true-masked instruction when there is no unmasked TU
2386       // instruction
2387       if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef())
2388         return false;
2389       // We can't use TA if the tie-operand is not IMPLICIT_DEF
2390       if (!N->getOperand(0).isUndef())
2391         IsTA = false;
2392     }
2393   }
2394 
2395   if (IsTA) {
2396     uint64_t TSFlags = TII->get(I->UnmaskedPseudo).TSFlags;
2397 
2398     // Check that we're dropping the merge operand, the mask operand, and any
2399     // policy operand when we transform to this unmasked pseudo.
2400     assert(!RISCVII::hasMergeOp(TSFlags) && RISCVII::hasDummyMaskOp(TSFlags) &&
2401            !RISCVII::hasVecPolicyOp(TSFlags) &&
2402            "Unexpected pseudo to transform to");
2403     (void)TSFlags;
2404   } else {
2405     uint64_t TSFlags = TII->get(I->UnmaskedTUPseudo).TSFlags;
2406 
2407     // Check that we're dropping the mask operand, and any policy operand
2408     // when we transform to this unmasked tu pseudo.
2409     assert(RISCVII::hasMergeOp(TSFlags) && RISCVII::hasDummyMaskOp(TSFlags) &&
2410            !RISCVII::hasVecPolicyOp(TSFlags) &&
2411            "Unexpected pseudo to transform to");
2412     (void)TSFlags;
2413   }
2414 
2415   unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo;
2416   SmallVector<SDValue, 8> Ops;
2417   // Skip the merge operand at index 0 if IsTA
2418   for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) {
2419     // Skip the mask, the policy, and the Glue.
2420     SDValue Op = N->getOperand(I);
2421     if (I == MaskOpIdx || I == TailPolicyOpIdx ||
2422         Op.getValueType() == MVT::Glue)
2423       continue;
2424     Ops.push_back(Op);
2425   }
2426 
2427   // Transitively apply any node glued to our new node.
2428   if (auto *TGlued = Glued->getGluedNode())
2429     Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
2430 
2431   SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
2432   ReplaceUses(N, Result);
2433 
2434   return true;
2435 }
2436 
2437 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
2438 // for instruction scheduling.
2439 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
2440   return new RISCVDAGToDAGISel(TM);
2441 }
2442