1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the RISCV target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVMCTargetDesc.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCVISelLowering.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/IR/IntrinsicsRISCV.h"
20 #include "llvm/Support/Alignment.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/KnownBits.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "riscv-isel"
29 
30 namespace llvm {
31 namespace RISCV {
32 #define GET_RISCVVSSEGTable_IMPL
33 #define GET_RISCVVLSEGTable_IMPL
34 #define GET_RISCVVLXSEGTable_IMPL
35 #define GET_RISCVVSXSEGTable_IMPL
36 #define GET_RISCVVLETable_IMPL
37 #define GET_RISCVVSETable_IMPL
38 #define GET_RISCVVLXTable_IMPL
39 #define GET_RISCVVSXTable_IMPL
40 #define GET_RISCVMaskedPseudosTable_IMPL
41 #include "RISCVGenSearchableTables.inc"
42 } // namespace RISCV
43 } // namespace llvm
44 
45 void RISCVDAGToDAGISel::PreprocessISelDAG() {
46   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
47                                        E = CurDAG->allnodes_end();
48        I != E;) {
49     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
50 
51     // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
52     // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
53     if (N->getOpcode() == ISD::SPLAT_VECTOR) {
54       MVT VT = N->getSimpleValueType(0);
55       unsigned Opc =
56           VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
57       SDLoc DL(N);
58       SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
59       SDValue Result = CurDAG->getNode(Opc, DL, VT, N->getOperand(0), VL);
60 
61       --I;
62       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
63       ++I;
64       CurDAG->DeleteNode(N);
65       continue;
66     }
67 
68     // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
69     // load. Done after lowering and combining so that we have a chance to
70     // optimize this to VMV_V_X_VL when the upper bits aren't needed.
71     if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL)
72       continue;
73 
74     assert(N->getNumOperands() == 3 && "Unexpected number of operands");
75     MVT VT = N->getSimpleValueType(0);
76     SDValue Lo = N->getOperand(0);
77     SDValue Hi = N->getOperand(1);
78     SDValue VL = N->getOperand(2);
79     assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
80            Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
81            "Unexpected VTs!");
82     MachineFunction &MF = CurDAG->getMachineFunction();
83     RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
84     SDLoc DL(N);
85 
86     // We use the same frame index we use for moving two i32s into 64-bit FPR.
87     // This is an analogous operation.
88     int FI = FuncInfo->getMoveF64FrameIndex(MF);
89     MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
90     const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
91     SDValue StackSlot =
92         CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
93 
94     SDValue Chain = CurDAG->getEntryNode();
95     Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
96 
97     SDValue OffsetSlot =
98         CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
99     Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
100                           Align(8));
101 
102     Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
103 
104     SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
105     SDValue IntID =
106         CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
107     SDValue Ops[] = {Chain,
108                      IntID,
109                      CurDAG->getUNDEF(VT),
110                      StackSlot,
111                      CurDAG->getRegister(RISCV::X0, MVT::i64),
112                      VL};
113 
114     SDValue Result = CurDAG->getMemIntrinsicNode(
115         ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8),
116         MachineMemOperand::MOLoad);
117 
118     // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the
119     // vlse we created.  This will cause general havok on the dag because
120     // anything below the conversion could be folded into other existing nodes.
121     // To avoid invalidating 'I', back it up to the convert node.
122     --I;
123     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
124 
125     // Now that we did that, the node is dead.  Increment the iterator to the
126     // next node to process, then delete N.
127     ++I;
128     CurDAG->DeleteNode(N);
129   }
130 }
131 
132 void RISCVDAGToDAGISel::PostprocessISelDAG() {
133   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
134 
135   bool MadeChange = false;
136   while (Position != CurDAG->allnodes_begin()) {
137     SDNode *N = &*--Position;
138     // Skip dead nodes and any non-machine opcodes.
139     if (N->use_empty() || !N->isMachineOpcode())
140       continue;
141 
142     MadeChange |= doPeepholeSExtW(N);
143     MadeChange |= doPeepholeLoadStoreADDI(N);
144     MadeChange |= doPeepholeMaskedRVV(N);
145   }
146 
147   if (MadeChange)
148     CurDAG->RemoveDeadNodes();
149 }
150 
151 static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL,
152                                          const MVT VT, int64_t Imm,
153                                          const RISCVSubtarget &Subtarget) {
154   assert(VT == MVT::i64 && "Expecting MVT::i64");
155   const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
156   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool(
157       ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT));
158   SDValue Addr = TLI->getAddr(CP, *CurDAG);
159   SDValue Offset = CurDAG->getTargetConstant(0, DL, VT);
160   // Since there is no data race, the chain can be the entry node.
161   SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset,
162                                         CurDAG->getEntryNode());
163   MachineFunction &MF = CurDAG->getMachineFunction();
164   MachineMemOperand *MemOp = MF.getMachineMemOperand(
165       MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
166       LLT(VT), CP->getAlign());
167   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp});
168   return Load;
169 }
170 
171 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
172                          int64_t Imm, const RISCVSubtarget &Subtarget) {
173   MVT XLenVT = Subtarget.getXLenVT();
174   RISCVMatInt::InstSeq Seq =
175       RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
176 
177   // If Imm is expensive to build, then we put it into constant pool.
178   if (Subtarget.useConstantPoolForLargeInts() &&
179       Seq.size() > Subtarget.getMaxBuildIntsCost())
180     return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget);
181 
182   SDNode *Result = nullptr;
183   SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
184   for (RISCVMatInt::Inst &Inst : Seq) {
185     SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
186     if (Inst.Opc == RISCV::LUI)
187       Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);
188     else if (Inst.Opc == RISCV::ADD_UW)
189       Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg,
190                                       CurDAG->getRegister(RISCV::X0, XLenVT));
191     else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
192              Inst.Opc == RISCV::SH3ADD)
193       Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg);
194     else
195       Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
196 
197     // Only the first instruction has X0 as its source.
198     SrcReg = SDValue(Result, 0);
199   }
200 
201   return Result;
202 }
203 
204 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
205                                unsigned RegClassID, unsigned SubReg0) {
206   assert(Regs.size() >= 2 && Regs.size() <= 8);
207 
208   SDLoc DL(Regs[0]);
209   SmallVector<SDValue, 8> Ops;
210 
211   Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
212 
213   for (unsigned I = 0; I < Regs.size(); ++I) {
214     Ops.push_back(Regs[I]);
215     Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
216   }
217   SDNode *N =
218       CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
219   return SDValue(N, 0);
220 }
221 
222 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
223                              unsigned NF) {
224   static const unsigned RegClassIDs[] = {
225       RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
226       RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
227       RISCV::VRN8M1RegClassID};
228 
229   return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0);
230 }
231 
232 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
233                              unsigned NF) {
234   static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID,
235                                          RISCV::VRN3M2RegClassID,
236                                          RISCV::VRN4M2RegClassID};
237 
238   return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0);
239 }
240 
241 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
242                              unsigned NF) {
243   return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID,
244                          RISCV::sub_vrm4_0);
245 }
246 
247 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
248                            unsigned NF, RISCVII::VLMUL LMUL) {
249   switch (LMUL) {
250   default:
251     llvm_unreachable("Invalid LMUL.");
252   case RISCVII::VLMUL::LMUL_F8:
253   case RISCVII::VLMUL::LMUL_F4:
254   case RISCVII::VLMUL::LMUL_F2:
255   case RISCVII::VLMUL::LMUL_1:
256     return createM1Tuple(CurDAG, Regs, NF);
257   case RISCVII::VLMUL::LMUL_2:
258     return createM2Tuple(CurDAG, Regs, NF);
259   case RISCVII::VLMUL::LMUL_4:
260     return createM4Tuple(CurDAG, Regs, NF);
261   }
262 }
263 
264 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
265     SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
266     bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
267     bool IsLoad, MVT *IndexVT) {
268   SDValue Chain = Node->getOperand(0);
269   SDValue Glue;
270 
271   SDValue Base;
272   SelectBaseAddr(Node->getOperand(CurOp++), Base);
273   Operands.push_back(Base); // Base pointer.
274 
275   if (IsStridedOrIndexed) {
276     Operands.push_back(Node->getOperand(CurOp++)); // Index.
277     if (IndexVT)
278       *IndexVT = Operands.back()->getSimpleValueType(0);
279   }
280 
281   if (IsMasked) {
282     // Mask needs to be copied to V0.
283     SDValue Mask = Node->getOperand(CurOp++);
284     Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
285     Glue = Chain.getValue(1);
286     Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
287   }
288   SDValue VL;
289   selectVLOp(Node->getOperand(CurOp++), VL);
290   Operands.push_back(VL);
291 
292   MVT XLenVT = Subtarget->getXLenVT();
293   SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
294   Operands.push_back(SEWOp);
295 
296   // Masked load has the tail policy argument.
297   if (IsMasked && IsLoad) {
298     // Policy must be a constant.
299     uint64_t Policy = Node->getConstantOperandVal(CurOp++);
300     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
301     Operands.push_back(PolicyOp);
302   }
303 
304   Operands.push_back(Chain); // Chain.
305   if (Glue)
306     Operands.push_back(Glue);
307 }
308 
309 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
310                                     bool IsStrided) {
311   SDLoc DL(Node);
312   unsigned NF = Node->getNumValues() - 1;
313   MVT VT = Node->getSimpleValueType(0);
314   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
315   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
316 
317   unsigned CurOp = 2;
318   SmallVector<SDValue, 8> Operands;
319   if (IsMasked) {
320     SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
321                                  Node->op_begin() + CurOp + NF);
322     SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
323     Operands.push_back(MaskedOff);
324     CurOp += NF;
325   }
326 
327   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
328                              Operands, /*IsLoad=*/true);
329 
330   const RISCV::VLSEGPseudo *P =
331       RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
332                             static_cast<unsigned>(LMUL));
333   MachineSDNode *Load =
334       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
335 
336   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
337     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
338 
339   SDValue SuperReg = SDValue(Load, 0);
340   for (unsigned I = 0; I < NF; ++I) {
341     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
342     ReplaceUses(SDValue(Node, I),
343                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
344   }
345 
346   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
347   CurDAG->RemoveDeadNode(Node);
348 }
349 
350 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
351   SDLoc DL(Node);
352   unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
353   MVT VT = Node->getSimpleValueType(0);
354   MVT XLenVT = Subtarget->getXLenVT();
355   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
356   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
357 
358   unsigned CurOp = 2;
359   SmallVector<SDValue, 7> Operands;
360   if (IsMasked) {
361     SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
362                                  Node->op_begin() + CurOp + NF);
363     SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
364     Operands.push_back(MaskedOff);
365     CurOp += NF;
366   }
367 
368   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
369                              /*IsStridedOrIndexed*/ false, Operands,
370                              /*IsLoad=*/true);
371 
372   const RISCV::VLSEGPseudo *P =
373       RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
374                             Log2SEW, static_cast<unsigned>(LMUL));
375   MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
376                                                MVT::Other, MVT::Glue, Operands);
377   SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,
378                                           /*Glue*/ SDValue(Load, 2));
379 
380   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
381     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
382 
383   SDValue SuperReg = SDValue(Load, 0);
384   for (unsigned I = 0; I < NF; ++I) {
385     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
386     ReplaceUses(SDValue(Node, I),
387                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
388   }
389 
390   ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0));   // VL
391   ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain
392   CurDAG->RemoveDeadNode(Node);
393 }
394 
395 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
396                                      bool IsOrdered) {
397   SDLoc DL(Node);
398   unsigned NF = Node->getNumValues() - 1;
399   MVT VT = Node->getSimpleValueType(0);
400   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
401   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
402 
403   unsigned CurOp = 2;
404   SmallVector<SDValue, 8> Operands;
405   if (IsMasked) {
406     SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
407                                  Node->op_begin() + CurOp + NF);
408     SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
409     Operands.push_back(MaskedOff);
410     CurOp += NF;
411   }
412 
413   MVT IndexVT;
414   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
415                              /*IsStridedOrIndexed*/ true, Operands,
416                              /*IsLoad=*/true, &IndexVT);
417 
418   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
419          "Element count mismatch");
420 
421   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
422   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
423   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
424     report_fatal_error("The V extension does not support EEW=64 for index "
425                        "values when XLEN=32");
426   }
427   const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
428       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
429       static_cast<unsigned>(IndexLMUL));
430   MachineSDNode *Load =
431       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
432 
433   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
434     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
435 
436   SDValue SuperReg = SDValue(Load, 0);
437   for (unsigned I = 0; I < NF; ++I) {
438     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
439     ReplaceUses(SDValue(Node, I),
440                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
441   }
442 
443   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
444   CurDAG->RemoveDeadNode(Node);
445 }
446 
447 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
448                                     bool IsStrided) {
449   SDLoc DL(Node);
450   unsigned NF = Node->getNumOperands() - 4;
451   if (IsStrided)
452     NF--;
453   if (IsMasked)
454     NF--;
455   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
456   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
457   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
458   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
459   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
460 
461   SmallVector<SDValue, 8> Operands;
462   Operands.push_back(StoreVal);
463   unsigned CurOp = 2 + NF;
464 
465   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
466                              Operands);
467 
468   const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
469       NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
470   MachineSDNode *Store =
471       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
472 
473   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
474     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
475 
476   ReplaceNode(Node, Store);
477 }
478 
479 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
480                                      bool IsOrdered) {
481   SDLoc DL(Node);
482   unsigned NF = Node->getNumOperands() - 5;
483   if (IsMasked)
484     --NF;
485   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
486   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
487   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
488   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
489   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
490 
491   SmallVector<SDValue, 8> Operands;
492   Operands.push_back(StoreVal);
493   unsigned CurOp = 2 + NF;
494 
495   MVT IndexVT;
496   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
497                              /*IsStridedOrIndexed*/ true, Operands,
498                              /*IsLoad=*/false, &IndexVT);
499 
500   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
501          "Element count mismatch");
502 
503   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
504   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
505   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
506     report_fatal_error("The V extension does not support EEW=64 for index "
507                        "values when XLEN=32");
508   }
509   const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
510       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
511       static_cast<unsigned>(IndexLMUL));
512   MachineSDNode *Store =
513       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
514 
515   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
516     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
517 
518   ReplaceNode(Node, Store);
519 }
520 
521 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
522   if (!Subtarget->hasVInstructions())
523     return;
524 
525   assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
526           Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) &&
527          "Unexpected opcode");
528 
529   SDLoc DL(Node);
530   MVT XLenVT = Subtarget->getXLenVT();
531 
532   bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
533   unsigned IntNoOffset = HasChain ? 1 : 0;
534   unsigned IntNo = Node->getConstantOperandVal(IntNoOffset);
535 
536   assert((IntNo == Intrinsic::riscv_vsetvli ||
537           IntNo == Intrinsic::riscv_vsetvlimax ||
538           IntNo == Intrinsic::riscv_vsetvli_opt ||
539           IntNo == Intrinsic::riscv_vsetvlimax_opt) &&
540          "Unexpected vsetvli intrinsic");
541 
542   bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax ||
543                IntNo == Intrinsic::riscv_vsetvlimax_opt;
544   unsigned Offset = IntNoOffset + (VLMax ? 1 : 2);
545 
546   assert(Node->getNumOperands() == Offset + 2 &&
547          "Unexpected number of operands");
548 
549   unsigned SEW =
550       RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
551   RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
552       Node->getConstantOperandVal(Offset + 1) & 0x7);
553 
554   unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
555                                             /*MaskAgnostic*/ false);
556   SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
557 
558   SmallVector<EVT, 2> VTs = {XLenVT};
559   if (HasChain)
560     VTs.push_back(MVT::Other);
561 
562   SDValue VLOperand;
563   unsigned Opcode = RISCV::PseudoVSETVLI;
564   if (VLMax) {
565     VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
566     Opcode = RISCV::PseudoVSETVLIX0;
567   } else {
568     VLOperand = Node->getOperand(IntNoOffset + 1);
569 
570     if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
571       uint64_t AVL = C->getZExtValue();
572       if (isUInt<5>(AVL)) {
573         SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
574         SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
575         if (HasChain)
576           Ops.push_back(Node->getOperand(0));
577         ReplaceNode(
578             Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
579         return;
580       }
581     }
582   }
583 
584   SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
585   if (HasChain)
586     Ops.push_back(Node->getOperand(0));
587 
588   ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
589 }
590 
591 void RISCVDAGToDAGISel::Select(SDNode *Node) {
592   // If we have a custom node, we have already selected.
593   if (Node->isMachineOpcode()) {
594     LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
595     Node->setNodeId(-1);
596     return;
597   }
598 
599   // Instruction Selection not handled by the auto-generated tablegen selection
600   // should be handled here.
601   unsigned Opcode = Node->getOpcode();
602   MVT XLenVT = Subtarget->getXLenVT();
603   SDLoc DL(Node);
604   MVT VT = Node->getSimpleValueType(0);
605 
606   switch (Opcode) {
607   case ISD::Constant: {
608     auto *ConstNode = cast<ConstantSDNode>(Node);
609     if (VT == XLenVT && ConstNode->isZero()) {
610       SDValue New =
611           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT);
612       ReplaceNode(Node, New.getNode());
613       return;
614     }
615     int64_t Imm = ConstNode->getSExtValue();
616     // If the upper XLen-16 bits are not used, try to convert this to a simm12
617     // by sign extending bit 15.
618     if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) &&
619         hasAllHUsers(Node))
620       Imm = SignExtend64(Imm, 16);
621     // If the upper 32-bits are not used try to convert this into a simm32 by
622     // sign extending bit 32.
623     if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
624       Imm = SignExtend64(Imm, 32);
625 
626     ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
627     return;
628   }
629   case ISD::FrameIndex: {
630     SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
631     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
632     SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
633     ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
634     return;
635   }
636   case ISD::SRL: {
637     // Optimize (srl (and X, C2), C) ->
638     //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
639     // Where C2 is a mask with C3 trailing ones.
640     // Taking into account that the C2 may have had lower bits unset by
641     // SimplifyDemandedBits. This avoids materializing the C2 immediate.
642     // This pattern occurs when type legalizing right shifts for types with
643     // less than XLen bits.
644     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
645     if (!N1C)
646       break;
647     SDValue N0 = Node->getOperand(0);
648     if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
649         !isa<ConstantSDNode>(N0.getOperand(1)))
650       break;
651     unsigned ShAmt = N1C->getZExtValue();
652     uint64_t Mask = N0.getConstantOperandVal(1);
653     Mask |= maskTrailingOnes<uint64_t>(ShAmt);
654     if (!isMask_64(Mask))
655       break;
656     unsigned TrailingOnes = countTrailingOnes(Mask);
657     // 32 trailing ones should use srliw via tablegen pattern.
658     if (TrailingOnes == 32 || ShAmt >= TrailingOnes)
659       break;
660     unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
661     SDNode *SLLI =
662         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
663                                CurDAG->getTargetConstant(LShAmt, DL, VT));
664     SDNode *SRLI = CurDAG->getMachineNode(
665         RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
666         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
667     ReplaceNode(Node, SRLI);
668     return;
669   }
670   case ISD::SRA: {
671     // Optimize (sra (sext_inreg X, i16), C) ->
672     //          (srai (slli X, (XLen-16), (XLen-16) + C)
673     // And      (sra (sext_inreg X, i8), C) ->
674     //          (srai (slli X, (XLen-8), (XLen-8) + C)
675     // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
676     // This transform matches the code we get without Zbb. The shifts are more
677     // compressible, and this can help expose CSE opportunities in the sdiv by
678     // constant optimization.
679     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
680     if (!N1C)
681       break;
682     SDValue N0 = Node->getOperand(0);
683     if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
684       break;
685     unsigned ShAmt = N1C->getZExtValue();
686     unsigned ExtSize =
687         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
688     // ExtSize of 32 should use sraiw via tablegen pattern.
689     if (ExtSize >= 32 || ShAmt >= ExtSize)
690       break;
691     unsigned LShAmt = Subtarget->getXLen() - ExtSize;
692     SDNode *SLLI =
693         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
694                                CurDAG->getTargetConstant(LShAmt, DL, VT));
695     SDNode *SRAI = CurDAG->getMachineNode(
696         RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
697         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
698     ReplaceNode(Node, SRAI);
699     return;
700   }
701   case ISD::AND: {
702     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
703     if (!N1C)
704       break;
705 
706     SDValue N0 = Node->getOperand(0);
707 
708     bool LeftShift = N0.getOpcode() == ISD::SHL;
709     if (!LeftShift && N0.getOpcode() != ISD::SRL)
710       break;
711 
712     auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
713     if (!C)
714       break;
715     uint64_t C2 = C->getZExtValue();
716     unsigned XLen = Subtarget->getXLen();
717     if (!C2 || C2 >= XLen)
718       break;
719 
720     uint64_t C1 = N1C->getZExtValue();
721 
722     // Keep track of whether this is an andi.
723     bool IsANDI = isInt<12>(N1C->getSExtValue());
724 
725     // Clear irrelevant bits in the mask.
726     if (LeftShift)
727       C1 &= maskTrailingZeros<uint64_t>(C2);
728     else
729       C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
730 
731     // Some transforms should only be done if the shift has a single use or
732     // the AND would become (srli (slli X, 32), 32)
733     bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
734 
735     SDValue X = N0.getOperand(0);
736 
737     // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
738     // with c3 leading zeros.
739     if (!LeftShift && isMask_64(C1)) {
740       uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
741       if (C2 < C3) {
742         // If the number of leading zeros is C2+32 this can be SRLIW.
743         if (C2 + 32 == C3) {
744           SDNode *SRLIW =
745               CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X,
746                                      CurDAG->getTargetConstant(C2, DL, XLenVT));
747           ReplaceNode(Node, SRLIW);
748           return;
749         }
750 
751         // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if
752         // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
753         //
754         // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
755         // legalized and goes through DAG combine.
756         if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&
757             X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
758             cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
759           SDNode *SRAIW =
760               CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, X.getOperand(0),
761                                      CurDAG->getTargetConstant(31, DL, XLenVT));
762           SDNode *SRLIW = CurDAG->getMachineNode(
763               RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0),
764               CurDAG->getTargetConstant(C3 - 32, DL, XLenVT));
765           ReplaceNode(Node, SRLIW);
766           return;
767         }
768 
769         // (srli (slli x, c3-c2), c3).
770         if (OneUseOrZExtW && !IsANDI) {
771           SDNode *SLLI = CurDAG->getMachineNode(
772               RISCV::SLLI, DL, XLenVT, X,
773               CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
774           SDNode *SRLI =
775               CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
776                                      CurDAG->getTargetConstant(C3, DL, XLenVT));
777           ReplaceNode(Node, SRLI);
778           return;
779         }
780       }
781     }
782 
783     // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
784     // shifted by c2 bits with c3 leading zeros.
785     if (LeftShift && isShiftedMask_64(C1)) {
786       uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
787 
788       if (C2 + C3 < XLen &&
789           C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
790         // Use slli.uw when possible.
791         if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
792           SDNode *SLLI_UW =
793               CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X,
794                                      CurDAG->getTargetConstant(C2, DL, XLenVT));
795           ReplaceNode(Node, SLLI_UW);
796           return;
797         }
798 
799         // (srli (slli c2+c3), c3)
800         if (OneUseOrZExtW && !IsANDI) {
801           SDNode *SLLI = CurDAG->getMachineNode(
802               RISCV::SLLI, DL, XLenVT, X,
803               CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
804           SDNode *SRLI =
805               CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
806                                      CurDAG->getTargetConstant(C3, DL, XLenVT));
807           ReplaceNode(Node, SRLI);
808           return;
809         }
810       }
811     }
812 
813     // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
814     // shifted mask with c2 leading zeros and c3 trailing zeros.
815     if (!LeftShift && isShiftedMask_64(C1)) {
816       uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
817       uint64_t C3 = countTrailingZeros(C1);
818       if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsANDI) {
819         SDNode *SRLI = CurDAG->getMachineNode(
820             RISCV::SRLI, DL, XLenVT, X,
821             CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
822         SDNode *SLLI =
823             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
824                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
825         ReplaceNode(Node, SLLI);
826         return;
827       }
828       // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
829       if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
830           OneUseOrZExtW && !IsANDI) {
831         SDNode *SRLIW = CurDAG->getMachineNode(
832             RISCV::SRLIW, DL, XLenVT, X,
833             CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
834         SDNode *SLLI =
835             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
836                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
837         ReplaceNode(Node, SLLI);
838         return;
839       }
840     }
841 
842     // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
843     // shifted mask with no leading zeros and c3 trailing zeros.
844     if (LeftShift && isShiftedMask_64(C1)) {
845       uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
846       uint64_t C3 = countTrailingZeros(C1);
847       if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsANDI) {
848         SDNode *SRLI = CurDAG->getMachineNode(
849             RISCV::SRLI, DL, XLenVT, X,
850             CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
851         SDNode *SLLI =
852             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
853                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
854         ReplaceNode(Node, SLLI);
855         return;
856       }
857       // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
858       if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsANDI) {
859         SDNode *SRLIW = CurDAG->getMachineNode(
860             RISCV::SRLIW, DL, XLenVT, X,
861             CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
862         SDNode *SLLI =
863             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
864                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
865         ReplaceNode(Node, SLLI);
866         return;
867       }
868     }
869 
870     break;
871   }
872   case ISD::MUL: {
873     // Special case for calculating (mul (and X, C2), C1) where the full product
874     // fits in XLen bits. We can shift X left by the number of leading zeros in
875     // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
876     // product has XLen trailing zeros, putting it in the output of MULHU. This
877     // can avoid materializing a constant in a register for C2.
878 
879     // RHS should be a constant.
880     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
881     if (!N1C || !N1C->hasOneUse())
882       break;
883 
884     // LHS should be an AND with constant.
885     SDValue N0 = Node->getOperand(0);
886     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
887       break;
888 
889     uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
890 
891     // Constant should be a mask.
892     if (!isMask_64(C2))
893       break;
894 
895     // This should be the only use of the AND unless we will use
896     // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND
897     // constants.
898     if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF))
899       break;
900 
901     // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this
902     // optimization.
903     if (isInt<12>(C2) ||
904         (C2 == UINT64_C(0xFFFF) &&
905          (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) ||
906         (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()))
907       break;
908 
909     // We need to shift left the AND input and C1 by a total of XLen bits.
910 
911     // How far left do we need to shift the AND input?
912     unsigned XLen = Subtarget->getXLen();
913     unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2));
914 
915     // The constant gets shifted by the remaining amount unless that would
916     // shift bits out.
917     uint64_t C1 = N1C->getZExtValue();
918     unsigned ConstantShift = XLen - LeadingZeros;
919     if (ConstantShift > (XLen - (64 - countLeadingZeros(C1))))
920       break;
921 
922     uint64_t ShiftedC1 = C1 << ConstantShift;
923     // If this RV32, we need to sign extend the constant.
924     if (XLen == 32)
925       ShiftedC1 = SignExtend64(ShiftedC1, 32);
926 
927     // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
928     SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);
929     SDNode *SLLI =
930         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
931                                CurDAG->getTargetConstant(LeadingZeros, DL, VT));
932     SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
933                                            SDValue(SLLI, 0), SDValue(Imm, 0));
934     ReplaceNode(Node, MULHU);
935     return;
936   }
937   case ISD::INTRINSIC_WO_CHAIN: {
938     unsigned IntNo = Node->getConstantOperandVal(0);
939     switch (IntNo) {
940       // By default we do not custom select any intrinsic.
941     default:
942       break;
943     case Intrinsic::riscv_vmsgeu:
944     case Intrinsic::riscv_vmsge: {
945       SDValue Src1 = Node->getOperand(1);
946       SDValue Src2 = Node->getOperand(2);
947       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
948       bool IsCmpUnsignedZero = false;
949       // Only custom select scalar second operand.
950       if (Src2.getValueType() != XLenVT)
951         break;
952       // Small constants are handled with patterns.
953       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
954         int64_t CVal = C->getSExtValue();
955         if (CVal >= -15 && CVal <= 16) {
956           if (!IsUnsigned || CVal != 0)
957             break;
958           IsCmpUnsignedZero = true;
959         }
960       }
961       MVT Src1VT = Src1.getSimpleValueType();
962       unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
963       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
964       default:
965         llvm_unreachable("Unexpected LMUL!");
966 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)            \
967   case RISCVII::VLMUL::lmulenum:                                               \
968     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
969                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
970     VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
971     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b;                             \
972     break;
973         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
974         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
975         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
976         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
977         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
978         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
979         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
980 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES
981       }
982       SDValue SEW = CurDAG->getTargetConstant(
983           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
984       SDValue VL;
985       selectVLOp(Node->getOperand(3), VL);
986 
987       // If vmsgeu with 0 immediate, expand it to vmset.
988       if (IsCmpUnsignedZero) {
989         ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
990         return;
991       }
992 
993       // Expand to
994       // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
995       SDValue Cmp = SDValue(
996           CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
997           0);
998       ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
999                                                {Cmp, Cmp, VL, SEW}));
1000       return;
1001     }
1002     case Intrinsic::riscv_vmsgeu_mask:
1003     case Intrinsic::riscv_vmsge_mask: {
1004       SDValue Src1 = Node->getOperand(2);
1005       SDValue Src2 = Node->getOperand(3);
1006       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1007       bool IsCmpUnsignedZero = false;
1008       // Only custom select scalar second operand.
1009       if (Src2.getValueType() != XLenVT)
1010         break;
1011       // Small constants are handled with patterns.
1012       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1013         int64_t CVal = C->getSExtValue();
1014         if (CVal >= -15 && CVal <= 16) {
1015           if (!IsUnsigned || CVal != 0)
1016             break;
1017           IsCmpUnsignedZero = true;
1018         }
1019       }
1020       MVT Src1VT = Src1.getSimpleValueType();
1021       unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1022           VMSetOpcode, VMANDOpcode;
1023       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1024       default:
1025         llvm_unreachable("Unexpected LMUL!");
1026 #define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b)                   \
1027   case RISCVII::VLMUL::lmulenum:                                               \
1028     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1029                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1030     VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1031                                  : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1032     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b;                             \
1033     break;
1034         CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1)
1035         CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2)
1036         CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4)
1037         CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8)
1038         CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16)
1039         CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32)
1040         CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64)
1041 #undef CASE_VMSLT_VMSET_OPCODES
1042       }
1043       // Mask operations use the LMUL from the mask type.
1044       switch (RISCVTargetLowering::getLMUL(VT)) {
1045       default:
1046         llvm_unreachable("Unexpected LMUL!");
1047 #define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix)                       \
1048   case RISCVII::VLMUL::lmulenum:                                               \
1049     VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
1050     VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
1051     VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix;                              \
1052     break;
1053         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8)
1054         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4)
1055         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2)
1056         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1)
1057         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2)
1058         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4)
1059         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8)
1060 #undef CASE_VMXOR_VMANDN_VMAND_OPCODES
1061       }
1062       SDValue SEW = CurDAG->getTargetConstant(
1063           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1064       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1065       SDValue VL;
1066       selectVLOp(Node->getOperand(5), VL);
1067       SDValue MaskedOff = Node->getOperand(1);
1068       SDValue Mask = Node->getOperand(4);
1069 
1070       // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}.
1071       if (IsCmpUnsignedZero) {
1072         SDValue VMSet =
1073             SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0);
1074         ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT,
1075                                                  {Mask, VMSet, VL, MaskSEW}));
1076         return;
1077       }
1078 
1079       // If the MaskedOff value and the Mask are the same value use
1080       // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
1081       // This avoids needing to copy v0 to vd before starting the next sequence.
1082       if (Mask == MaskedOff) {
1083         SDValue Cmp = SDValue(
1084             CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1085             0);
1086         ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1087                                                  {Mask, Cmp, VL, MaskSEW}));
1088         return;
1089       }
1090 
1091       // Mask needs to be copied to V0.
1092       SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1093                                            RISCV::V0, Mask, SDValue());
1094       SDValue Glue = Chain.getValue(1);
1095       SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1096 
1097       // Otherwise use
1098       // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1099       SDValue Cmp = SDValue(
1100           CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1101                                  {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1102           0);
1103       ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1104                                                {Cmp, Mask, VL, MaskSEW}));
1105       return;
1106     }
1107     case Intrinsic::riscv_vsetvli_opt:
1108     case Intrinsic::riscv_vsetvlimax_opt:
1109       return selectVSETVLI(Node);
1110     }
1111     break;
1112   }
1113   case ISD::INTRINSIC_W_CHAIN: {
1114     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1115     switch (IntNo) {
1116       // By default we do not custom select any intrinsic.
1117     default:
1118       break;
1119     case Intrinsic::riscv_vsetvli:
1120     case Intrinsic::riscv_vsetvlimax:
1121       return selectVSETVLI(Node);
1122     case Intrinsic::riscv_vlseg2:
1123     case Intrinsic::riscv_vlseg3:
1124     case Intrinsic::riscv_vlseg4:
1125     case Intrinsic::riscv_vlseg5:
1126     case Intrinsic::riscv_vlseg6:
1127     case Intrinsic::riscv_vlseg7:
1128     case Intrinsic::riscv_vlseg8: {
1129       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1130       return;
1131     }
1132     case Intrinsic::riscv_vlseg2_mask:
1133     case Intrinsic::riscv_vlseg3_mask:
1134     case Intrinsic::riscv_vlseg4_mask:
1135     case Intrinsic::riscv_vlseg5_mask:
1136     case Intrinsic::riscv_vlseg6_mask:
1137     case Intrinsic::riscv_vlseg7_mask:
1138     case Intrinsic::riscv_vlseg8_mask: {
1139       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1140       return;
1141     }
1142     case Intrinsic::riscv_vlsseg2:
1143     case Intrinsic::riscv_vlsseg3:
1144     case Intrinsic::riscv_vlsseg4:
1145     case Intrinsic::riscv_vlsseg5:
1146     case Intrinsic::riscv_vlsseg6:
1147     case Intrinsic::riscv_vlsseg7:
1148     case Intrinsic::riscv_vlsseg8: {
1149       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1150       return;
1151     }
1152     case Intrinsic::riscv_vlsseg2_mask:
1153     case Intrinsic::riscv_vlsseg3_mask:
1154     case Intrinsic::riscv_vlsseg4_mask:
1155     case Intrinsic::riscv_vlsseg5_mask:
1156     case Intrinsic::riscv_vlsseg6_mask:
1157     case Intrinsic::riscv_vlsseg7_mask:
1158     case Intrinsic::riscv_vlsseg8_mask: {
1159       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1160       return;
1161     }
1162     case Intrinsic::riscv_vloxseg2:
1163     case Intrinsic::riscv_vloxseg3:
1164     case Intrinsic::riscv_vloxseg4:
1165     case Intrinsic::riscv_vloxseg5:
1166     case Intrinsic::riscv_vloxseg6:
1167     case Intrinsic::riscv_vloxseg7:
1168     case Intrinsic::riscv_vloxseg8:
1169       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1170       return;
1171     case Intrinsic::riscv_vluxseg2:
1172     case Intrinsic::riscv_vluxseg3:
1173     case Intrinsic::riscv_vluxseg4:
1174     case Intrinsic::riscv_vluxseg5:
1175     case Intrinsic::riscv_vluxseg6:
1176     case Intrinsic::riscv_vluxseg7:
1177     case Intrinsic::riscv_vluxseg8:
1178       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1179       return;
1180     case Intrinsic::riscv_vloxseg2_mask:
1181     case Intrinsic::riscv_vloxseg3_mask:
1182     case Intrinsic::riscv_vloxseg4_mask:
1183     case Intrinsic::riscv_vloxseg5_mask:
1184     case Intrinsic::riscv_vloxseg6_mask:
1185     case Intrinsic::riscv_vloxseg7_mask:
1186     case Intrinsic::riscv_vloxseg8_mask:
1187       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1188       return;
1189     case Intrinsic::riscv_vluxseg2_mask:
1190     case Intrinsic::riscv_vluxseg3_mask:
1191     case Intrinsic::riscv_vluxseg4_mask:
1192     case Intrinsic::riscv_vluxseg5_mask:
1193     case Intrinsic::riscv_vluxseg6_mask:
1194     case Intrinsic::riscv_vluxseg7_mask:
1195     case Intrinsic::riscv_vluxseg8_mask:
1196       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1197       return;
1198     case Intrinsic::riscv_vlseg8ff:
1199     case Intrinsic::riscv_vlseg7ff:
1200     case Intrinsic::riscv_vlseg6ff:
1201     case Intrinsic::riscv_vlseg5ff:
1202     case Intrinsic::riscv_vlseg4ff:
1203     case Intrinsic::riscv_vlseg3ff:
1204     case Intrinsic::riscv_vlseg2ff: {
1205       selectVLSEGFF(Node, /*IsMasked*/ false);
1206       return;
1207     }
1208     case Intrinsic::riscv_vlseg8ff_mask:
1209     case Intrinsic::riscv_vlseg7ff_mask:
1210     case Intrinsic::riscv_vlseg6ff_mask:
1211     case Intrinsic::riscv_vlseg5ff_mask:
1212     case Intrinsic::riscv_vlseg4ff_mask:
1213     case Intrinsic::riscv_vlseg3ff_mask:
1214     case Intrinsic::riscv_vlseg2ff_mask: {
1215       selectVLSEGFF(Node, /*IsMasked*/ true);
1216       return;
1217     }
1218     case Intrinsic::riscv_vloxei:
1219     case Intrinsic::riscv_vloxei_mask:
1220     case Intrinsic::riscv_vluxei:
1221     case Intrinsic::riscv_vluxei_mask: {
1222       bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1223                       IntNo == Intrinsic::riscv_vluxei_mask;
1224       bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1225                        IntNo == Intrinsic::riscv_vloxei_mask;
1226 
1227       MVT VT = Node->getSimpleValueType(0);
1228       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1229 
1230       unsigned CurOp = 2;
1231       // Masked intrinsic only have TU version pseduo instructions.
1232       bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
1233       SmallVector<SDValue, 8> Operands;
1234       if (IsTU)
1235         Operands.push_back(Node->getOperand(CurOp++));
1236       else
1237         // Skip the undef passthru operand for nomask TA version pseudo
1238         CurOp++;
1239 
1240       MVT IndexVT;
1241       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1242                                  /*IsStridedOrIndexed*/ true, Operands,
1243                                  /*IsLoad=*/true, &IndexVT);
1244 
1245       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1246              "Element count mismatch");
1247 
1248       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1249       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1250       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1251       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1252         report_fatal_error("The V extension does not support EEW=64 for index "
1253                            "values when XLEN=32");
1254       }
1255       const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1256           IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1257           static_cast<unsigned>(IndexLMUL));
1258       MachineSDNode *Load =
1259           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1260 
1261       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1262         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1263 
1264       ReplaceNode(Node, Load);
1265       return;
1266     }
1267     case Intrinsic::riscv_vlm:
1268     case Intrinsic::riscv_vle:
1269     case Intrinsic::riscv_vle_mask:
1270     case Intrinsic::riscv_vlse:
1271     case Intrinsic::riscv_vlse_mask: {
1272       bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1273                       IntNo == Intrinsic::riscv_vlse_mask;
1274       bool IsStrided =
1275           IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1276 
1277       MVT VT = Node->getSimpleValueType(0);
1278       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1279 
1280       unsigned CurOp = 2;
1281       // The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
1282       bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1283       // Masked intrinsic only have TU version pseduo instructions.
1284       bool IsTU =
1285           HasPassthruOperand &&
1286           ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked);
1287       SmallVector<SDValue, 8> Operands;
1288       if (IsTU)
1289         Operands.push_back(Node->getOperand(CurOp++));
1290       else if (HasPassthruOperand)
1291         // Skip the undef passthru operand for nomask TA version pseudo
1292         CurOp++;
1293 
1294       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1295                                  Operands, /*IsLoad=*/true);
1296 
1297       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1298       const RISCV::VLEPseudo *P =
1299           RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
1300                               static_cast<unsigned>(LMUL));
1301       MachineSDNode *Load =
1302           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1303 
1304       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1305         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1306 
1307       ReplaceNode(Node, Load);
1308       return;
1309     }
1310     case Intrinsic::riscv_vleff:
1311     case Intrinsic::riscv_vleff_mask: {
1312       bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1313 
1314       MVT VT = Node->getSimpleValueType(0);
1315       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1316 
1317       unsigned CurOp = 2;
1318       // Masked intrinsic only have TU version pseduo instructions.
1319       bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
1320       SmallVector<SDValue, 7> Operands;
1321       if (IsTU)
1322         Operands.push_back(Node->getOperand(CurOp++));
1323       else
1324         // Skip the undef passthru operand for nomask TA version pseudo
1325         CurOp++;
1326 
1327       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1328                                  /*IsStridedOrIndexed*/ false, Operands,
1329                                  /*IsLoad=*/true);
1330 
1331       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1332       const RISCV::VLEPseudo *P =
1333           RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
1334                               Log2SEW, static_cast<unsigned>(LMUL));
1335       MachineSDNode *Load =
1336           CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0),
1337                                  MVT::Other, MVT::Glue, Operands);
1338       SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,
1339                                               /*Glue*/ SDValue(Load, 2));
1340 
1341       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1342         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1343 
1344       ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
1345       ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL
1346       ReplaceUses(SDValue(Node, 2), SDValue(Load, 1));   // Chain
1347       CurDAG->RemoveDeadNode(Node);
1348       return;
1349     }
1350     }
1351     break;
1352   }
1353   case ISD::INTRINSIC_VOID: {
1354     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1355     switch (IntNo) {
1356     case Intrinsic::riscv_vsseg2:
1357     case Intrinsic::riscv_vsseg3:
1358     case Intrinsic::riscv_vsseg4:
1359     case Intrinsic::riscv_vsseg5:
1360     case Intrinsic::riscv_vsseg6:
1361     case Intrinsic::riscv_vsseg7:
1362     case Intrinsic::riscv_vsseg8: {
1363       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1364       return;
1365     }
1366     case Intrinsic::riscv_vsseg2_mask:
1367     case Intrinsic::riscv_vsseg3_mask:
1368     case Intrinsic::riscv_vsseg4_mask:
1369     case Intrinsic::riscv_vsseg5_mask:
1370     case Intrinsic::riscv_vsseg6_mask:
1371     case Intrinsic::riscv_vsseg7_mask:
1372     case Intrinsic::riscv_vsseg8_mask: {
1373       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1374       return;
1375     }
1376     case Intrinsic::riscv_vssseg2:
1377     case Intrinsic::riscv_vssseg3:
1378     case Intrinsic::riscv_vssseg4:
1379     case Intrinsic::riscv_vssseg5:
1380     case Intrinsic::riscv_vssseg6:
1381     case Intrinsic::riscv_vssseg7:
1382     case Intrinsic::riscv_vssseg8: {
1383       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1384       return;
1385     }
1386     case Intrinsic::riscv_vssseg2_mask:
1387     case Intrinsic::riscv_vssseg3_mask:
1388     case Intrinsic::riscv_vssseg4_mask:
1389     case Intrinsic::riscv_vssseg5_mask:
1390     case Intrinsic::riscv_vssseg6_mask:
1391     case Intrinsic::riscv_vssseg7_mask:
1392     case Intrinsic::riscv_vssseg8_mask: {
1393       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1394       return;
1395     }
1396     case Intrinsic::riscv_vsoxseg2:
1397     case Intrinsic::riscv_vsoxseg3:
1398     case Intrinsic::riscv_vsoxseg4:
1399     case Intrinsic::riscv_vsoxseg5:
1400     case Intrinsic::riscv_vsoxseg6:
1401     case Intrinsic::riscv_vsoxseg7:
1402     case Intrinsic::riscv_vsoxseg8:
1403       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1404       return;
1405     case Intrinsic::riscv_vsuxseg2:
1406     case Intrinsic::riscv_vsuxseg3:
1407     case Intrinsic::riscv_vsuxseg4:
1408     case Intrinsic::riscv_vsuxseg5:
1409     case Intrinsic::riscv_vsuxseg6:
1410     case Intrinsic::riscv_vsuxseg7:
1411     case Intrinsic::riscv_vsuxseg8:
1412       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1413       return;
1414     case Intrinsic::riscv_vsoxseg2_mask:
1415     case Intrinsic::riscv_vsoxseg3_mask:
1416     case Intrinsic::riscv_vsoxseg4_mask:
1417     case Intrinsic::riscv_vsoxseg5_mask:
1418     case Intrinsic::riscv_vsoxseg6_mask:
1419     case Intrinsic::riscv_vsoxseg7_mask:
1420     case Intrinsic::riscv_vsoxseg8_mask:
1421       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1422       return;
1423     case Intrinsic::riscv_vsuxseg2_mask:
1424     case Intrinsic::riscv_vsuxseg3_mask:
1425     case Intrinsic::riscv_vsuxseg4_mask:
1426     case Intrinsic::riscv_vsuxseg5_mask:
1427     case Intrinsic::riscv_vsuxseg6_mask:
1428     case Intrinsic::riscv_vsuxseg7_mask:
1429     case Intrinsic::riscv_vsuxseg8_mask:
1430       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1431       return;
1432     case Intrinsic::riscv_vsoxei:
1433     case Intrinsic::riscv_vsoxei_mask:
1434     case Intrinsic::riscv_vsuxei:
1435     case Intrinsic::riscv_vsuxei_mask: {
1436       bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
1437                       IntNo == Intrinsic::riscv_vsuxei_mask;
1438       bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
1439                        IntNo == Intrinsic::riscv_vsoxei_mask;
1440 
1441       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1442       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1443 
1444       unsigned CurOp = 2;
1445       SmallVector<SDValue, 8> Operands;
1446       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1447 
1448       MVT IndexVT;
1449       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1450                                  /*IsStridedOrIndexed*/ true, Operands,
1451                                  /*IsLoad=*/false, &IndexVT);
1452 
1453       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1454              "Element count mismatch");
1455 
1456       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1457       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1458       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1459       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1460         report_fatal_error("The V extension does not support EEW=64 for index "
1461                            "values when XLEN=32");
1462       }
1463       const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
1464           IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW,
1465           static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
1466       MachineSDNode *Store =
1467           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1468 
1469       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1470         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1471 
1472       ReplaceNode(Node, Store);
1473       return;
1474     }
1475     case Intrinsic::riscv_vsm:
1476     case Intrinsic::riscv_vse:
1477     case Intrinsic::riscv_vse_mask:
1478     case Intrinsic::riscv_vsse:
1479     case Intrinsic::riscv_vsse_mask: {
1480       bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
1481                       IntNo == Intrinsic::riscv_vsse_mask;
1482       bool IsStrided =
1483           IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
1484 
1485       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1486       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1487 
1488       unsigned CurOp = 2;
1489       SmallVector<SDValue, 8> Operands;
1490       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1491 
1492       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1493                                  Operands);
1494 
1495       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1496       const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
1497           IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
1498       MachineSDNode *Store =
1499           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1500       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1501         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1502 
1503       ReplaceNode(Node, Store);
1504       return;
1505     }
1506     }
1507     break;
1508   }
1509   case ISD::BITCAST: {
1510     MVT SrcVT = Node->getOperand(0).getSimpleValueType();
1511     // Just drop bitcasts between vectors if both are fixed or both are
1512     // scalable.
1513     if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
1514         (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
1515       ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
1516       CurDAG->RemoveDeadNode(Node);
1517       return;
1518     }
1519     break;
1520   }
1521   case ISD::INSERT_SUBVECTOR: {
1522     SDValue V = Node->getOperand(0);
1523     SDValue SubV = Node->getOperand(1);
1524     SDLoc DL(SubV);
1525     auto Idx = Node->getConstantOperandVal(2);
1526     MVT SubVecVT = SubV.getSimpleValueType();
1527 
1528     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1529     MVT SubVecContainerVT = SubVecVT;
1530     // Establish the correct scalable-vector types for any fixed-length type.
1531     if (SubVecVT.isFixedLengthVector())
1532       SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
1533     if (VT.isFixedLengthVector())
1534       VT = TLI.getContainerForFixedLengthVector(VT);
1535 
1536     const auto *TRI = Subtarget->getRegisterInfo();
1537     unsigned SubRegIdx;
1538     std::tie(SubRegIdx, Idx) =
1539         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1540             VT, SubVecContainerVT, Idx, TRI);
1541 
1542     // If the Idx hasn't been completely eliminated then this is a subvector
1543     // insert which doesn't naturally align to a vector register. These must
1544     // be handled using instructions to manipulate the vector registers.
1545     if (Idx != 0)
1546       break;
1547 
1548     RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
1549     bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
1550                            SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
1551                            SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
1552     (void)IsSubVecPartReg; // Silence unused variable warning without asserts.
1553     assert((!IsSubVecPartReg || V.isUndef()) &&
1554            "Expecting lowering to have created legal INSERT_SUBVECTORs when "
1555            "the subvector is smaller than a full-sized register");
1556 
1557     // If we haven't set a SubRegIdx, then we must be going between
1558     // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
1559     if (SubRegIdx == RISCV::NoSubRegister) {
1560       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT);
1561       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
1562                  InRegClassID &&
1563              "Unexpected subvector extraction");
1564       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
1565       SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
1566                                                DL, VT, SubV, RC);
1567       ReplaceNode(Node, NewNode);
1568       return;
1569     }
1570 
1571     SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
1572     ReplaceNode(Node, Insert.getNode());
1573     return;
1574   }
1575   case ISD::EXTRACT_SUBVECTOR: {
1576     SDValue V = Node->getOperand(0);
1577     auto Idx = Node->getConstantOperandVal(1);
1578     MVT InVT = V.getSimpleValueType();
1579     SDLoc DL(V);
1580 
1581     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1582     MVT SubVecContainerVT = VT;
1583     // Establish the correct scalable-vector types for any fixed-length type.
1584     if (VT.isFixedLengthVector())
1585       SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
1586     if (InVT.isFixedLengthVector())
1587       InVT = TLI.getContainerForFixedLengthVector(InVT);
1588 
1589     const auto *TRI = Subtarget->getRegisterInfo();
1590     unsigned SubRegIdx;
1591     std::tie(SubRegIdx, Idx) =
1592         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1593             InVT, SubVecContainerVT, Idx, TRI);
1594 
1595     // If the Idx hasn't been completely eliminated then this is a subvector
1596     // extract which doesn't naturally align to a vector register. These must
1597     // be handled using instructions to manipulate the vector registers.
1598     if (Idx != 0)
1599       break;
1600 
1601     // If we haven't set a SubRegIdx, then we must be going between
1602     // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
1603     if (SubRegIdx == RISCV::NoSubRegister) {
1604       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
1605       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
1606                  InRegClassID &&
1607              "Unexpected subvector extraction");
1608       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
1609       SDNode *NewNode =
1610           CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
1611       ReplaceNode(Node, NewNode);
1612       return;
1613     }
1614 
1615     SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
1616     ReplaceNode(Node, Extract.getNode());
1617     return;
1618   }
1619   case ISD::SPLAT_VECTOR:
1620   case RISCVISD::VMV_S_X_VL:
1621   case RISCVISD::VFMV_S_F_VL:
1622   case RISCVISD::VMV_V_X_VL:
1623   case RISCVISD::VFMV_V_F_VL: {
1624     // Try to match splat of a scalar load to a strided load with stride of x0.
1625     bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
1626                         Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
1627     if (IsScalarMove && !Node->getOperand(0).isUndef())
1628       break;
1629     SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0);
1630     auto *Ld = dyn_cast<LoadSDNode>(Src);
1631     if (!Ld)
1632       break;
1633     EVT MemVT = Ld->getMemoryVT();
1634     // The memory VT should be the same size as the element type.
1635     if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
1636       break;
1637     if (!IsProfitableToFold(Src, Node, Node) ||
1638         !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
1639       break;
1640 
1641     SDValue VL;
1642     if (Node->getOpcode() == ISD::SPLAT_VECTOR)
1643       VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1644     else if (IsScalarMove) {
1645       // We could deal with more VL if we update the VSETVLI insert pass to
1646       // avoid introducing more VSETVLI.
1647       if (!isOneConstant(Node->getOperand(2)))
1648         break;
1649       selectVLOp(Node->getOperand(2), VL);
1650     } else
1651       selectVLOp(Node->getOperand(1), VL);
1652 
1653     unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1654     SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
1655 
1656     SDValue Operands[] = {Ld->getBasePtr(),
1657                           CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW,
1658                           Ld->getChain()};
1659 
1660     RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1661     const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
1662         /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false,
1663         Log2SEW, static_cast<unsigned>(LMUL));
1664     MachineSDNode *Load =
1665         CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1666 
1667     if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1668       CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1669 
1670     ReplaceNode(Node, Load);
1671     return;
1672   }
1673   }
1674 
1675   // Select the default instruction.
1676   SelectCode(Node);
1677 }
1678 
1679 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
1680     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
1681   switch (ConstraintID) {
1682   case InlineAsm::Constraint_m:
1683     // We just support simple memory operands that have a single address
1684     // operand and need no special handling.
1685     OutOps.push_back(Op);
1686     return false;
1687   case InlineAsm::Constraint_A:
1688     OutOps.push_back(Op);
1689     return false;
1690   default:
1691     break;
1692   }
1693 
1694   return true;
1695 }
1696 
1697 bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
1698   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
1699     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
1700     return true;
1701   }
1702   return false;
1703 }
1704 
1705 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {
1706   // If this is FrameIndex, select it directly. Otherwise just let it get
1707   // selected to a register independently.
1708   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr))
1709     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
1710   else
1711     Base = Addr;
1712   return true;
1713 }
1714 
1715 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
1716                                         SDValue &ShAmt) {
1717   // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift
1718   // amount. If there is an AND on the shift amount, we can bypass it if it
1719   // doesn't affect any of those bits.
1720   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
1721     const APInt &AndMask = N->getConstantOperandAPInt(1);
1722 
1723     // Since the max shift amount is a power of 2 we can subtract 1 to make a
1724     // mask that covers the bits needed to represent all shift amounts.
1725     assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
1726     APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
1727 
1728     if (ShMask.isSubsetOf(AndMask)) {
1729       ShAmt = N.getOperand(0);
1730       return true;
1731     }
1732 
1733     // SimplifyDemandedBits may have optimized the mask so try restoring any
1734     // bits that are known zero.
1735     KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0));
1736     if (ShMask.isSubsetOf(AndMask | Known.Zero)) {
1737       ShAmt = N.getOperand(0);
1738       return true;
1739     }
1740   } else if (N.getOpcode() == ISD::SUB &&
1741              isa<ConstantSDNode>(N.getOperand(0))) {
1742     uint64_t Imm = N.getConstantOperandVal(0);
1743     // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
1744     // generate a NEG instead of a SUB of a constant.
1745     if (Imm != 0 && Imm % ShiftWidth == 0) {
1746       SDLoc DL(N);
1747       EVT VT = N.getValueType();
1748       SDValue Zero =
1749           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1750       unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
1751       MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
1752                                                   N.getOperand(1));
1753       ShAmt = SDValue(Neg, 0);
1754       return true;
1755     }
1756   }
1757 
1758   ShAmt = N;
1759   return true;
1760 }
1761 
1762 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) {
1763   if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1764       cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
1765     Val = N.getOperand(0);
1766     return true;
1767   }
1768   MVT VT = N.getSimpleValueType();
1769   if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) {
1770     Val = N;
1771     return true;
1772   }
1773 
1774   return false;
1775 }
1776 
1777 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
1778   if (N.getOpcode() == ISD::AND) {
1779     auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
1780     if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) {
1781       Val = N.getOperand(0);
1782       return true;
1783     }
1784   }
1785   MVT VT = N.getSimpleValueType();
1786   APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32);
1787   if (CurDAG->MaskedValueIsZero(N, Mask)) {
1788     Val = N;
1789     return true;
1790   }
1791 
1792   return false;
1793 }
1794 
1795 // Return true if all users of this SDNode* only consume the lower \p Bits.
1796 // This can be used to form W instructions for add/sub/mul/shl even when the
1797 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
1798 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
1799 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
1800 // the add/sub/mul/shl to become non-W instructions. By checking the users we
1801 // may be able to use a W instruction and CSE with the other instruction if
1802 // this has happened. We could try to detect that the CSE opportunity exists
1803 // before doing this, but that would be more complicated.
1804 // TODO: Does this need to look through AND/OR/XOR to their users to find more
1805 // opportunities.
1806 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
1807   assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
1808           Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
1809           Node->getOpcode() == ISD::SRL ||
1810           Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
1811           isa<ConstantSDNode>(Node)) &&
1812          "Unexpected opcode");
1813 
1814   for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
1815     SDNode *User = *UI;
1816     // Users of this node should have already been instruction selected
1817     if (!User->isMachineOpcode())
1818       return false;
1819 
1820     // TODO: Add more opcodes?
1821     switch (User->getMachineOpcode()) {
1822     default:
1823       return false;
1824     case RISCV::ADDW:
1825     case RISCV::ADDIW:
1826     case RISCV::SUBW:
1827     case RISCV::MULW:
1828     case RISCV::SLLW:
1829     case RISCV::SLLIW:
1830     case RISCV::SRAW:
1831     case RISCV::SRAIW:
1832     case RISCV::SRLW:
1833     case RISCV::SRLIW:
1834     case RISCV::DIVW:
1835     case RISCV::DIVUW:
1836     case RISCV::REMW:
1837     case RISCV::REMUW:
1838     case RISCV::ROLW:
1839     case RISCV::RORW:
1840     case RISCV::RORIW:
1841     case RISCV::CLZW:
1842     case RISCV::CTZW:
1843     case RISCV::CPOPW:
1844     case RISCV::SLLI_UW:
1845     case RISCV::FCVT_H_W:
1846     case RISCV::FCVT_H_WU:
1847     case RISCV::FCVT_S_W:
1848     case RISCV::FCVT_S_WU:
1849     case RISCV::FCVT_D_W:
1850     case RISCV::FCVT_D_WU:
1851       if (Bits < 32)
1852         return false;
1853       break;
1854     case RISCV::SLLI:
1855       // SLLI only uses the lower (XLen - ShAmt) bits.
1856       if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
1857         return false;
1858       break;
1859     case RISCV::ANDI:
1860       if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1))))
1861         return false;
1862       break;
1863     case RISCV::SEXT_B:
1864       if (Bits < 8)
1865         return false;
1866       break;
1867     case RISCV::SEXT_H:
1868     case RISCV::ZEXT_H_RV32:
1869     case RISCV::ZEXT_H_RV64:
1870       if (Bits < 16)
1871         return false;
1872       break;
1873     case RISCV::ADD_UW:
1874     case RISCV::SH1ADD_UW:
1875     case RISCV::SH2ADD_UW:
1876     case RISCV::SH3ADD_UW:
1877       // The first operand to add.uw/shXadd.uw is implicitly zero extended from
1878       // 32 bits.
1879       if (UI.getOperandNo() != 0 || Bits < 32)
1880         return false;
1881       break;
1882     case RISCV::SB:
1883       if (UI.getOperandNo() != 0 || Bits < 8)
1884         return false;
1885       break;
1886     case RISCV::SH:
1887       if (UI.getOperandNo() != 0 || Bits < 16)
1888         return false;
1889       break;
1890     case RISCV::SW:
1891       if (UI.getOperandNo() != 0 || Bits < 32)
1892         return false;
1893       break;
1894     }
1895   }
1896 
1897   return true;
1898 }
1899 
1900 // Select VL as a 5 bit immediate or a value that will become a register. This
1901 // allows us to choose betwen VSETIVLI or VSETVLI later.
1902 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
1903   auto *C = dyn_cast<ConstantSDNode>(N);
1904   if (C && isUInt<5>(C->getZExtValue())) {
1905     VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
1906                                    N->getValueType(0));
1907   } else if (C && C->isAllOnesValue()) {
1908     // Treat all ones as VLMax.
1909     VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
1910                                    N->getValueType(0));
1911   } else if (isa<RegisterSDNode>(N) &&
1912              cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
1913     // All our VL operands use an operand that allows GPRNoX0 or an immediate
1914     // as the register class. Convert X0 to a special immediate to pass the
1915     // MachineVerifier. This is recognized specially by the vsetvli insertion
1916     // pass.
1917     VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
1918                                    N->getValueType(0));
1919   } else {
1920     VL = N;
1921   }
1922 
1923   return true;
1924 }
1925 
1926 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
1927   if (N.getOpcode() != RISCVISD::VMV_V_X_VL)
1928     return false;
1929   SplatVal = N.getOperand(0);
1930   return true;
1931 }
1932 
1933 using ValidateFn = bool (*)(int64_t);
1934 
1935 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
1936                                    SelectionDAG &DAG,
1937                                    const RISCVSubtarget &Subtarget,
1938                                    ValidateFn ValidateImm) {
1939   if (N.getOpcode() != RISCVISD::VMV_V_X_VL ||
1940       !isa<ConstantSDNode>(N.getOperand(0)))
1941     return false;
1942 
1943   int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();
1944 
1945   // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
1946   // type is wider than the resulting vector element type: an implicit
1947   // truncation first takes place. Therefore, perform a manual
1948   // truncation/sign-extension in order to ignore any truncated bits and catch
1949   // any zero-extended immediate.
1950   // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
1951   // sign-extending to (XLenVT -1).
1952   MVT XLenVT = Subtarget.getXLenVT();
1953   assert(XLenVT == N.getOperand(0).getSimpleValueType() &&
1954          "Unexpected splat operand type");
1955   MVT EltVT = N.getSimpleValueType().getVectorElementType();
1956   if (EltVT.bitsLT(XLenVT))
1957     SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
1958 
1959   if (!ValidateImm(SplatImm))
1960     return false;
1961 
1962   SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT);
1963   return true;
1964 }
1965 
1966 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
1967   return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget,
1968                                 [](int64_t Imm) { return isInt<5>(Imm); });
1969 }
1970 
1971 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
1972   return selectVSplatSimmHelper(
1973       N, SplatVal, *CurDAG, *Subtarget,
1974       [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
1975 }
1976 
1977 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
1978                                                       SDValue &SplatVal) {
1979   return selectVSplatSimmHelper(
1980       N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
1981         return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
1982       });
1983 }
1984 
1985 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
1986   if (N.getOpcode() != RISCVISD::VMV_V_X_VL ||
1987       !isa<ConstantSDNode>(N.getOperand(0)))
1988     return false;
1989 
1990   int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();
1991 
1992   if (!isUInt<5>(SplatImm))
1993     return false;
1994 
1995   SplatVal =
1996       CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
1997 
1998   return true;
1999 }
2000 
2001 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
2002                                        SDValue &Imm) {
2003   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
2004     int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
2005 
2006     if (!isInt<5>(ImmVal))
2007       return false;
2008 
2009     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
2010     return true;
2011   }
2012 
2013   return false;
2014 }
2015 
2016 // Merge an ADDI into the offset of a load/store instruction where possible.
2017 // (load (addi base, off1), off2) -> (load base, off1+off2)
2018 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
2019 // This is possible when off1+off2 fits a 12-bit immediate.
2020 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
2021   int OffsetOpIdx;
2022   int BaseOpIdx;
2023 
2024   // Only attempt this optimisation for I-type loads and S-type stores.
2025   switch (N->getMachineOpcode()) {
2026   default:
2027     return false;
2028   case RISCV::LB:
2029   case RISCV::LH:
2030   case RISCV::LW:
2031   case RISCV::LBU:
2032   case RISCV::LHU:
2033   case RISCV::LWU:
2034   case RISCV::LD:
2035   case RISCV::FLH:
2036   case RISCV::FLW:
2037   case RISCV::FLD:
2038     BaseOpIdx = 0;
2039     OffsetOpIdx = 1;
2040     break;
2041   case RISCV::SB:
2042   case RISCV::SH:
2043   case RISCV::SW:
2044   case RISCV::SD:
2045   case RISCV::FSH:
2046   case RISCV::FSW:
2047   case RISCV::FSD:
2048     BaseOpIdx = 1;
2049     OffsetOpIdx = 2;
2050     break;
2051   }
2052 
2053   if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
2054     return false;
2055 
2056   SDValue Base = N->getOperand(BaseOpIdx);
2057 
2058   // If the base is an ADDI, we can merge it in to the load/store.
2059   if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
2060     return false;
2061 
2062   SDValue ImmOperand = Base.getOperand(1);
2063   uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
2064 
2065   if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
2066     int64_t Offset1 = Const->getSExtValue();
2067     int64_t CombinedOffset = Offset1 + Offset2;
2068     if (!isInt<12>(CombinedOffset))
2069       return false;
2070     ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
2071                                            ImmOperand.getValueType());
2072   } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
2073     // If the off1 in (addi base, off1) is a global variable's address (its
2074     // low part, really), then we can rely on the alignment of that variable
2075     // to provide a margin of safety before off1 can overflow the 12 bits.
2076     // Check if off2 falls within that margin; if so off1+off2 can't overflow.
2077     const DataLayout &DL = CurDAG->getDataLayout();
2078     Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
2079     if (Offset2 != 0 && Alignment <= Offset2)
2080       return false;
2081     int64_t Offset1 = GA->getOffset();
2082     int64_t CombinedOffset = Offset1 + Offset2;
2083     ImmOperand = CurDAG->getTargetGlobalAddress(
2084         GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
2085         CombinedOffset, GA->getTargetFlags());
2086   } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
2087     // Ditto.
2088     Align Alignment = CP->getAlign();
2089     if (Offset2 != 0 && Alignment <= Offset2)
2090       return false;
2091     int64_t Offset1 = CP->getOffset();
2092     int64_t CombinedOffset = Offset1 + Offset2;
2093     ImmOperand = CurDAG->getTargetConstantPool(
2094         CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
2095         CombinedOffset, CP->getTargetFlags());
2096   } else {
2097     return false;
2098   }
2099 
2100   LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
2101   LLVM_DEBUG(Base->dump(CurDAG));
2102   LLVM_DEBUG(dbgs() << "\nN: ");
2103   LLVM_DEBUG(N->dump(CurDAG));
2104   LLVM_DEBUG(dbgs() << "\n");
2105 
2106   // Modify the offset operand of the load/store.
2107   if (BaseOpIdx == 0) // Load
2108     CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
2109                                N->getOperand(2));
2110   else // Store
2111     CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
2112                                ImmOperand, N->getOperand(3));
2113 
2114   return true;
2115 }
2116 
2117 // Try to remove sext.w if the input is a W instruction or can be made into
2118 // a W instruction cheaply.
2119 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
2120   // Look for the sext.w pattern, addiw rd, rs1, 0.
2121   if (N->getMachineOpcode() != RISCV::ADDIW ||
2122       !isNullConstant(N->getOperand(1)))
2123     return false;
2124 
2125   SDValue N0 = N->getOperand(0);
2126   if (!N0.isMachineOpcode())
2127     return false;
2128 
2129   switch (N0.getMachineOpcode()) {
2130   default:
2131     break;
2132   case RISCV::ADD:
2133   case RISCV::ADDI:
2134   case RISCV::SUB:
2135   case RISCV::MUL:
2136   case RISCV::SLLI: {
2137     // Convert sext.w+add/sub/mul to their W instructions. This will create
2138     // a new independent instruction. This improves latency.
2139     unsigned Opc;
2140     switch (N0.getMachineOpcode()) {
2141     default:
2142       llvm_unreachable("Unexpected opcode!");
2143     case RISCV::ADD:  Opc = RISCV::ADDW;  break;
2144     case RISCV::ADDI: Opc = RISCV::ADDIW; break;
2145     case RISCV::SUB:  Opc = RISCV::SUBW;  break;
2146     case RISCV::MUL:  Opc = RISCV::MULW;  break;
2147     case RISCV::SLLI: Opc = RISCV::SLLIW; break;
2148     }
2149 
2150     SDValue N00 = N0.getOperand(0);
2151     SDValue N01 = N0.getOperand(1);
2152 
2153     // Shift amount needs to be uimm5.
2154     if (N0.getMachineOpcode() == RISCV::SLLI &&
2155         !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
2156       break;
2157 
2158     SDNode *Result =
2159         CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
2160                                N00, N01);
2161     ReplaceUses(N, Result);
2162     return true;
2163   }
2164   case RISCV::ADDW:
2165   case RISCV::ADDIW:
2166   case RISCV::SUBW:
2167   case RISCV::MULW:
2168   case RISCV::SLLIW:
2169     // Result is already sign extended just remove the sext.w.
2170     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
2171     ReplaceUses(N, N0.getNode());
2172     return true;
2173   }
2174 
2175   return false;
2176 }
2177 
2178 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
2179 // corresponding "unmasked" pseudo versions. The mask we're interested in will
2180 // take the form of a V0 physical register operand, with a glued
2181 // register-setting instruction.
2182 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
2183   const RISCV::RISCVMaskedPseudoInfo *I =
2184       RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
2185   if (!I)
2186     return false;
2187 
2188   unsigned MaskOpIdx = I->MaskOpIdx;
2189 
2190   // Check that we're using V0 as a mask register.
2191   if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) ||
2192       cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0)
2193     return false;
2194 
2195   // The glued user defines V0.
2196   const auto *Glued = N->getGluedNode();
2197 
2198   if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
2199     return false;
2200 
2201   // Check that we're defining V0 as a mask register.
2202   if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
2203       cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
2204     return false;
2205 
2206   // Check the instruction defining V0; it needs to be a VMSET pseudo.
2207   SDValue MaskSetter = Glued->getOperand(2);
2208 
2209   const auto IsVMSet = [](unsigned Opc) {
2210     return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
2211            Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
2212            Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
2213            Opc == RISCV::PseudoVMSET_M_B8;
2214   };
2215 
2216   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
2217   // undefined behaviour if it's the wrong bitwidth, so we could choose to
2218   // assume that it's all-ones? Same applies to its VL.
2219   if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode()))
2220     return false;
2221 
2222   // Retrieve the tail policy operand index, if any.
2223   Optional<unsigned> TailPolicyOpIdx;
2224   const RISCVInstrInfo *TII = static_cast<const RISCVInstrInfo *>(
2225       CurDAG->getSubtarget().getInstrInfo());
2226 
2227   const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
2228 
2229   if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) {
2230     // The last operand of the pseudo is the policy op, but we're expecting a
2231     // Glue operand last. We may also have a chain.
2232     TailPolicyOpIdx = N->getNumOperands() - 1;
2233     if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue)
2234       (*TailPolicyOpIdx)--;
2235     if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other)
2236       (*TailPolicyOpIdx)--;
2237 
2238     // If the policy isn't TAIL_AGNOSTIC we can't perform this optimization.
2239     if (N->getConstantOperandVal(*TailPolicyOpIdx) != RISCVII::TAIL_AGNOSTIC)
2240       return false;
2241   }
2242 
2243   const MCInstrDesc &UnmaskedMCID = TII->get(I->UnmaskedPseudo);
2244 
2245   // Check that we're dropping the merge operand, the mask operand, and any
2246   // policy operand when we transform to this unmasked pseudo.
2247   assert(!RISCVII::hasMergeOp(UnmaskedMCID.TSFlags) &&
2248          RISCVII::hasDummyMaskOp(UnmaskedMCID.TSFlags) &&
2249          !RISCVII::hasVecPolicyOp(UnmaskedMCID.TSFlags) &&
2250          "Unexpected pseudo to transform to");
2251   (void)UnmaskedMCID;
2252 
2253   SmallVector<SDValue, 8> Ops;
2254   // Skip the merge operand at index 0.
2255   for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
2256     // Skip the mask, the policy, and the Glue.
2257     SDValue Op = N->getOperand(I);
2258     if (I == MaskOpIdx || I == TailPolicyOpIdx ||
2259         Op.getValueType() == MVT::Glue)
2260       continue;
2261     Ops.push_back(Op);
2262   }
2263 
2264   // Transitively apply any node glued to our new node.
2265   if (auto *TGlued = Glued->getGluedNode())
2266     Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
2267 
2268   SDNode *Result =
2269       CurDAG->getMachineNode(I->UnmaskedPseudo, SDLoc(N), N->getVTList(), Ops);
2270   ReplaceUses(N, Result);
2271 
2272   return true;
2273 }
2274 
2275 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
2276 // for instruction scheduling.
2277 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
2278   return new RISCVDAGToDAGISel(TM);
2279 }
2280