1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the RISCV target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVMCTargetDesc.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCVISelLowering.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/IR/IntrinsicsRISCV.h"
20 #include "llvm/Support/Alignment.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/KnownBits.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "riscv-isel"
29 
30 namespace llvm {
31 namespace RISCV {
32 #define GET_RISCVVSSEGTable_IMPL
33 #define GET_RISCVVLSEGTable_IMPL
34 #define GET_RISCVVLXSEGTable_IMPL
35 #define GET_RISCVVSXSEGTable_IMPL
36 #define GET_RISCVVLETable_IMPL
37 #define GET_RISCVVSETable_IMPL
38 #define GET_RISCVVLXTable_IMPL
39 #define GET_RISCVVSXTable_IMPL
40 #define GET_RISCVMaskedPseudosTable_IMPL
41 #include "RISCVGenSearchableTables.inc"
42 } // namespace RISCV
43 } // namespace llvm
44 
45 void RISCVDAGToDAGISel::PreprocessISelDAG() {
46   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
47                                        E = CurDAG->allnodes_end();
48        I != E;) {
49     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
50 
51     // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
52     // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
53     if (N->getOpcode() == ISD::SPLAT_VECTOR) {
54       MVT VT = N->getSimpleValueType(0);
55       unsigned Opc =
56           VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
57       SDLoc DL(N);
58       SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
59       SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
60                                        N->getOperand(0), VL);
61 
62       --I;
63       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
64       ++I;
65       CurDAG->DeleteNode(N);
66       continue;
67     }
68 
69     // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
70     // load. Done after lowering and combining so that we have a chance to
71     // optimize this to VMV_V_X_VL when the upper bits aren't needed.
72     if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL)
73       continue;
74 
75     assert(N->getNumOperands() == 4 && "Unexpected number of operands");
76     MVT VT = N->getSimpleValueType(0);
77     SDValue Passthru = N->getOperand(0);
78     SDValue Lo = N->getOperand(1);
79     SDValue Hi = N->getOperand(2);
80     SDValue VL = N->getOperand(3);
81     assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
82            Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
83            "Unexpected VTs!");
84     MachineFunction &MF = CurDAG->getMachineFunction();
85     RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
86     SDLoc DL(N);
87 
88     // We use the same frame index we use for moving two i32s into 64-bit FPR.
89     // This is an analogous operation.
90     int FI = FuncInfo->getMoveF64FrameIndex(MF);
91     MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
92     const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
93     SDValue StackSlot =
94         CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
95 
96     SDValue Chain = CurDAG->getEntryNode();
97     Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
98 
99     SDValue OffsetSlot =
100         CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
101     Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
102                           Align(8));
103 
104     Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
105 
106     SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
107     SDValue IntID =
108         CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
109     SDValue Ops[] = {Chain,
110                      IntID,
111                      Passthru,
112                      StackSlot,
113                      CurDAG->getRegister(RISCV::X0, MVT::i64),
114                      VL};
115 
116     SDValue Result = CurDAG->getMemIntrinsicNode(
117         ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8),
118         MachineMemOperand::MOLoad);
119 
120     // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the
121     // vlse we created.  This will cause general havok on the dag because
122     // anything below the conversion could be folded into other existing nodes.
123     // To avoid invalidating 'I', back it up to the convert node.
124     --I;
125     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
126 
127     // Now that we did that, the node is dead.  Increment the iterator to the
128     // next node to process, then delete N.
129     ++I;
130     CurDAG->DeleteNode(N);
131   }
132 }
133 
134 void RISCVDAGToDAGISel::PostprocessISelDAG() {
135   HandleSDNode Dummy(CurDAG->getRoot());
136   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
137 
138   bool MadeChange = false;
139   while (Position != CurDAG->allnodes_begin()) {
140     SDNode *N = &*--Position;
141     // Skip dead nodes and any non-machine opcodes.
142     if (N->use_empty() || !N->isMachineOpcode())
143       continue;
144 
145     MadeChange |= doPeepholeSExtW(N);
146     MadeChange |= doPeepholeLoadStoreADDI(N);
147     MadeChange |= doPeepholeMaskedRVV(N);
148   }
149 
150   CurDAG->setRoot(Dummy.getValue());
151 
152   if (MadeChange)
153     CurDAG->RemoveDeadNodes();
154 }
155 
156 // Returns true if N is a MachineSDNode that has a reg and simm12 memory
157 // operand. The indices of the base pointer and offset are returned in BaseOpIdx
158 // and OffsetOpIdx.
159 static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx,
160                          unsigned &OffsetOpIdx) {
161   switch (N->getMachineOpcode()) {
162   case RISCV::LB:
163   case RISCV::LH:
164   case RISCV::LW:
165   case RISCV::LBU:
166   case RISCV::LHU:
167   case RISCV::LWU:
168   case RISCV::LD:
169   case RISCV::FLH:
170   case RISCV::FLW:
171   case RISCV::FLD:
172     BaseOpIdx = 0;
173     OffsetOpIdx = 1;
174     return true;
175   case RISCV::SB:
176   case RISCV::SH:
177   case RISCV::SW:
178   case RISCV::SD:
179   case RISCV::FSH:
180   case RISCV::FSW:
181   case RISCV::FSD:
182     BaseOpIdx = 1;
183     OffsetOpIdx = 2;
184     return true;
185   }
186 
187   return false;
188 }
189 
190 static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
191                             RISCVMatInt::InstSeq &Seq) {
192   SDNode *Result = nullptr;
193   SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
194   for (RISCVMatInt::Inst &Inst : Seq) {
195     SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT);
196     switch (Inst.getOpndKind()) {
197     case RISCVMatInt::Imm:
198       Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm);
199       break;
200     case RISCVMatInt::RegX0:
201       Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg,
202                                       CurDAG->getRegister(RISCV::X0, VT));
203       break;
204     case RISCVMatInt::RegReg:
205       Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SrcReg);
206       break;
207     case RISCVMatInt::RegImm:
208       Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm);
209       break;
210     }
211 
212     // Only the first instruction has X0 as its source.
213     SrcReg = SDValue(Result, 0);
214   }
215 
216   return Result;
217 }
218 
219 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
220                          int64_t Imm, const RISCVSubtarget &Subtarget) {
221   RISCVMatInt::InstSeq Seq =
222       RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
223 
224   return selectImmSeq(CurDAG, DL, VT, Seq);
225 }
226 
227 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
228                            unsigned NF, RISCVII::VLMUL LMUL) {
229   static const unsigned M1TupleRegClassIDs[] = {
230       RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
231       RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
232       RISCV::VRN8M1RegClassID};
233   static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
234                                                 RISCV::VRN3M2RegClassID,
235                                                 RISCV::VRN4M2RegClassID};
236 
237   assert(Regs.size() >= 2 && Regs.size() <= 8);
238 
239   unsigned RegClassID;
240   unsigned SubReg0;
241   switch (LMUL) {
242   default:
243     llvm_unreachable("Invalid LMUL.");
244   case RISCVII::VLMUL::LMUL_F8:
245   case RISCVII::VLMUL::LMUL_F4:
246   case RISCVII::VLMUL::LMUL_F2:
247   case RISCVII::VLMUL::LMUL_1:
248     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
249                   "Unexpected subreg numbering");
250     SubReg0 = RISCV::sub_vrm1_0;
251     RegClassID = M1TupleRegClassIDs[NF - 2];
252     break;
253   case RISCVII::VLMUL::LMUL_2:
254     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
255                   "Unexpected subreg numbering");
256     SubReg0 = RISCV::sub_vrm2_0;
257     RegClassID = M2TupleRegClassIDs[NF - 2];
258     break;
259   case RISCVII::VLMUL::LMUL_4:
260     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
261                   "Unexpected subreg numbering");
262     SubReg0 = RISCV::sub_vrm4_0;
263     RegClassID = RISCV::VRN2M4RegClassID;
264     break;
265   }
266 
267   SDLoc DL(Regs[0]);
268   SmallVector<SDValue, 8> Ops;
269 
270   Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
271 
272   for (unsigned I = 0; I < Regs.size(); ++I) {
273     Ops.push_back(Regs[I]);
274     Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
275   }
276   SDNode *N =
277       CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
278   return SDValue(N, 0);
279 }
280 
281 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
282     SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
283     bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
284     bool IsLoad, MVT *IndexVT) {
285   SDValue Chain = Node->getOperand(0);
286   SDValue Glue;
287 
288   SDValue Base;
289   SelectBaseAddr(Node->getOperand(CurOp++), Base);
290   Operands.push_back(Base); // Base pointer.
291 
292   if (IsStridedOrIndexed) {
293     Operands.push_back(Node->getOperand(CurOp++)); // Index.
294     if (IndexVT)
295       *IndexVT = Operands.back()->getSimpleValueType(0);
296   }
297 
298   if (IsMasked) {
299     // Mask needs to be copied to V0.
300     SDValue Mask = Node->getOperand(CurOp++);
301     Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
302     Glue = Chain.getValue(1);
303     Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
304   }
305   SDValue VL;
306   selectVLOp(Node->getOperand(CurOp++), VL);
307   Operands.push_back(VL);
308 
309   MVT XLenVT = Subtarget->getXLenVT();
310   SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
311   Operands.push_back(SEWOp);
312 
313   // Masked load has the tail policy argument.
314   if (IsMasked && IsLoad) {
315     // Policy must be a constant.
316     uint64_t Policy = Node->getConstantOperandVal(CurOp++);
317     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
318     Operands.push_back(PolicyOp);
319   }
320 
321   Operands.push_back(Chain); // Chain.
322   if (Glue)
323     Operands.push_back(Glue);
324 }
325 
326 static bool isAllUndef(ArrayRef<SDValue> Values) {
327   return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); });
328 }
329 
330 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
331                                     bool IsStrided) {
332   SDLoc DL(Node);
333   unsigned NF = Node->getNumValues() - 1;
334   MVT VT = Node->getSimpleValueType(0);
335   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
336   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
337 
338   unsigned CurOp = 2;
339   SmallVector<SDValue, 8> Operands;
340 
341   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
342                                Node->op_begin() + CurOp + NF);
343   bool IsTU = IsMasked || !isAllUndef(Regs);
344   if (IsTU) {
345     SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
346     Operands.push_back(Merge);
347   }
348   CurOp += NF;
349 
350   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
351                              Operands, /*IsLoad=*/true);
352 
353   const RISCV::VLSEGPseudo *P =
354       RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
355                             static_cast<unsigned>(LMUL));
356   MachineSDNode *Load =
357       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
358 
359   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
360     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
361 
362   SDValue SuperReg = SDValue(Load, 0);
363   for (unsigned I = 0; I < NF; ++I) {
364     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
365     ReplaceUses(SDValue(Node, I),
366                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
367   }
368 
369   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
370   CurDAG->RemoveDeadNode(Node);
371 }
372 
373 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
374   SDLoc DL(Node);
375   unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
376   MVT VT = Node->getSimpleValueType(0);
377   MVT XLenVT = Subtarget->getXLenVT();
378   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
379   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
380 
381   unsigned CurOp = 2;
382   SmallVector<SDValue, 7> Operands;
383 
384   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
385                                Node->op_begin() + CurOp + NF);
386   bool IsTU = IsMasked || !isAllUndef(Regs);
387   if (IsTU) {
388     SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
389     Operands.push_back(MaskedOff);
390   }
391   CurOp += NF;
392 
393   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
394                              /*IsStridedOrIndexed*/ false, Operands,
395                              /*IsLoad=*/true);
396 
397   const RISCV::VLSEGPseudo *P =
398       RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
399                             Log2SEW, static_cast<unsigned>(LMUL));
400   MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
401                                                XLenVT, MVT::Other, Operands);
402 
403   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
404     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
405 
406   SDValue SuperReg = SDValue(Load, 0);
407   for (unsigned I = 0; I < NF; ++I) {
408     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
409     ReplaceUses(SDValue(Node, I),
410                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
411   }
412 
413   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));     // VL
414   ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
415   CurDAG->RemoveDeadNode(Node);
416 }
417 
418 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
419                                      bool IsOrdered) {
420   SDLoc DL(Node);
421   unsigned NF = Node->getNumValues() - 1;
422   MVT VT = Node->getSimpleValueType(0);
423   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
424   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
425 
426   unsigned CurOp = 2;
427   SmallVector<SDValue, 8> Operands;
428 
429   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
430                                Node->op_begin() + CurOp + NF);
431   bool IsTU = IsMasked || !isAllUndef(Regs);
432   if (IsTU) {
433     SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
434     Operands.push_back(MaskedOff);
435   }
436   CurOp += NF;
437 
438   MVT IndexVT;
439   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
440                              /*IsStridedOrIndexed*/ true, Operands,
441                              /*IsLoad=*/true, &IndexVT);
442 
443   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
444          "Element count mismatch");
445 
446   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
447   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449     report_fatal_error("The V extension does not support EEW=64 for index "
450                        "values when XLEN=32");
451   }
452   const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
453       NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454       static_cast<unsigned>(IndexLMUL));
455   MachineSDNode *Load =
456       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
457 
458   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
459     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
460 
461   SDValue SuperReg = SDValue(Load, 0);
462   for (unsigned I = 0; I < NF; ++I) {
463     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
464     ReplaceUses(SDValue(Node, I),
465                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
466   }
467 
468   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
469   CurDAG->RemoveDeadNode(Node);
470 }
471 
472 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
473                                     bool IsStrided) {
474   SDLoc DL(Node);
475   unsigned NF = Node->getNumOperands() - 4;
476   if (IsStrided)
477     NF--;
478   if (IsMasked)
479     NF--;
480   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
481   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
482   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
483   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
484   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
485 
486   SmallVector<SDValue, 8> Operands;
487   Operands.push_back(StoreVal);
488   unsigned CurOp = 2 + NF;
489 
490   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
491                              Operands);
492 
493   const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
494       NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
495   MachineSDNode *Store =
496       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
497 
498   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
499     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
500 
501   ReplaceNode(Node, Store);
502 }
503 
504 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
505                                      bool IsOrdered) {
506   SDLoc DL(Node);
507   unsigned NF = Node->getNumOperands() - 5;
508   if (IsMasked)
509     --NF;
510   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
511   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
512   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
513   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
514   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
515 
516   SmallVector<SDValue, 8> Operands;
517   Operands.push_back(StoreVal);
518   unsigned CurOp = 2 + NF;
519 
520   MVT IndexVT;
521   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
522                              /*IsStridedOrIndexed*/ true, Operands,
523                              /*IsLoad=*/false, &IndexVT);
524 
525   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
526          "Element count mismatch");
527 
528   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
529   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
530   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
531     report_fatal_error("The V extension does not support EEW=64 for index "
532                        "values when XLEN=32");
533   }
534   const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
535       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
536       static_cast<unsigned>(IndexLMUL));
537   MachineSDNode *Store =
538       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
539 
540   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
541     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
542 
543   ReplaceNode(Node, Store);
544 }
545 
546 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
547   if (!Subtarget->hasVInstructions())
548     return;
549 
550   assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
551           Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) &&
552          "Unexpected opcode");
553 
554   SDLoc DL(Node);
555   MVT XLenVT = Subtarget->getXLenVT();
556 
557   bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
558   unsigned IntNoOffset = HasChain ? 1 : 0;
559   unsigned IntNo = Node->getConstantOperandVal(IntNoOffset);
560 
561   assert((IntNo == Intrinsic::riscv_vsetvli ||
562           IntNo == Intrinsic::riscv_vsetvlimax ||
563           IntNo == Intrinsic::riscv_vsetvli_opt ||
564           IntNo == Intrinsic::riscv_vsetvlimax_opt) &&
565          "Unexpected vsetvli intrinsic");
566 
567   bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax ||
568                IntNo == Intrinsic::riscv_vsetvlimax_opt;
569   unsigned Offset = IntNoOffset + (VLMax ? 1 : 2);
570 
571   assert(Node->getNumOperands() == Offset + 2 &&
572          "Unexpected number of operands");
573 
574   unsigned SEW =
575       RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
576   RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
577       Node->getConstantOperandVal(Offset + 1) & 0x7);
578 
579   unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
580                                             /*MaskAgnostic*/ false);
581   SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
582 
583   SmallVector<EVT, 2> VTs = {XLenVT};
584   if (HasChain)
585     VTs.push_back(MVT::Other);
586 
587   SDValue VLOperand;
588   unsigned Opcode = RISCV::PseudoVSETVLI;
589   if (VLMax) {
590     VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
591     Opcode = RISCV::PseudoVSETVLIX0;
592   } else {
593     VLOperand = Node->getOperand(IntNoOffset + 1);
594 
595     if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
596       uint64_t AVL = C->getZExtValue();
597       if (isUInt<5>(AVL)) {
598         SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
599         SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
600         if (HasChain)
601           Ops.push_back(Node->getOperand(0));
602         ReplaceNode(
603             Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
604         return;
605       }
606     }
607   }
608 
609   SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
610   if (HasChain)
611     Ops.push_back(Node->getOperand(0));
612 
613   ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
614 }
615 
616 void RISCVDAGToDAGISel::Select(SDNode *Node) {
617   // If we have a custom node, we have already selected.
618   if (Node->isMachineOpcode()) {
619     LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
620     Node->setNodeId(-1);
621     return;
622   }
623 
624   // Instruction Selection not handled by the auto-generated tablegen selection
625   // should be handled here.
626   unsigned Opcode = Node->getOpcode();
627   MVT XLenVT = Subtarget->getXLenVT();
628   SDLoc DL(Node);
629   MVT VT = Node->getSimpleValueType(0);
630 
631   switch (Opcode) {
632   case ISD::Constant: {
633     auto *ConstNode = cast<ConstantSDNode>(Node);
634     if (VT == XLenVT && ConstNode->isZero()) {
635       SDValue New =
636           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT);
637       ReplaceNode(Node, New.getNode());
638       return;
639     }
640     int64_t Imm = ConstNode->getSExtValue();
641     // If the upper XLen-16 bits are not used, try to convert this to a simm12
642     // by sign extending bit 15.
643     if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
644         hasAllHUsers(Node))
645       Imm = SignExtend64<16>(Imm);
646     // If the upper 32-bits are not used try to convert this into a simm32 by
647     // sign extending bit 32.
648     if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
649       Imm = SignExtend64<32>(Imm);
650 
651     ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
652     return;
653   }
654   case ISD::ADD: {
655     // Try to select ADD + immediate used as memory addresses to
656     // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by
657     // doPeepholeLoadStoreADDI.
658 
659     // LHS should be an immediate.
660     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
661     if (!N1C)
662       break;
663 
664     int64_t Offset = N1C->getSExtValue();
665     int64_t Lo12 = SignExtend64<12>(Offset);
666 
667     // Don't do this if the lower 12 bits are 0 or we could use ADDI directly.
668     if (Lo12 == 0 || isInt<12>(Offset))
669       break;
670 
671     // Don't do this if we can use a pair of ADDIs.
672     if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2))
673       break;
674 
675     RISCVMatInt::InstSeq Seq =
676         RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits());
677 
678     Offset -= Lo12;
679     // Restore sign bits for RV32.
680     if (!Subtarget->is64Bit())
681       Offset = SignExtend64<32>(Offset);
682 
683     // We can fold if the last operation is an ADDI or its an ADDIW that could
684     // be treated as an ADDI.
685     if (Seq.back().Opc != RISCV::ADDI &&
686         !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset)))
687       break;
688     assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12");
689     // Drop the last operation.
690     Seq.pop_back();
691     assert(!Seq.empty() && "Expected more instructions in sequence");
692 
693     bool AllPointerUses = true;
694     for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
695       SDNode *User = *UI;
696 
697       // Is this user a memory instruction that uses a register and immediate
698       // that has this ADD as its pointer.
699       unsigned BaseOpIdx, OffsetOpIdx;
700       if (!User->isMachineOpcode() ||
701           !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) ||
702           UI.getOperandNo() != BaseOpIdx) {
703         AllPointerUses = false;
704         break;
705       }
706 
707       // If the memory instruction already has an offset, make sure the combined
708       // offset is foldable.
709       int64_t MemOffs =
710           cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue();
711       MemOffs += Lo12;
712       if (!isInt<12>(MemOffs)) {
713         AllPointerUses = false;
714         break;
715       }
716     }
717 
718     if (!AllPointerUses)
719       break;
720 
721     // Emit (ADDI (ADD X, Hi), Lo)
722     SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq);
723     SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT,
724                                          Node->getOperand(0), SDValue(Imm, 0));
725     SDNode *ADDI =
726         CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0),
727                                CurDAG->getTargetConstant(Lo12, DL, VT));
728     ReplaceNode(Node, ADDI);
729     return;
730   }
731   case ISD::SRL: {
732     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
733     if (!N1C)
734       break;
735     SDValue N0 = Node->getOperand(0);
736     if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
737         !isa<ConstantSDNode>(N0.getOperand(1)))
738       break;
739     unsigned ShAmt = N1C->getZExtValue();
740     uint64_t Mask = N0.getConstantOperandVal(1);
741 
742     // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
743     // 32 leading zeros and C3 trailing zeros.
744     if (isShiftedMask_64(Mask)) {
745       unsigned XLen = Subtarget->getXLen();
746       unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask));
747       unsigned TrailingZeros = countTrailingZeros(Mask);
748       if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
749         SDNode *SRLIW = CurDAG->getMachineNode(
750             RISCV::SRLIW, DL, VT, N0->getOperand(0),
751             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
752         SDNode *SLLI = CurDAG->getMachineNode(
753             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
754             CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
755         ReplaceNode(Node, SLLI);
756         return;
757       }
758     }
759 
760     // Optimize (srl (and X, C2), C) ->
761     //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
762     // Where C2 is a mask with C3 trailing ones.
763     // Taking into account that the C2 may have had lower bits unset by
764     // SimplifyDemandedBits. This avoids materializing the C2 immediate.
765     // This pattern occurs when type legalizing right shifts for types with
766     // less than XLen bits.
767     Mask |= maskTrailingOnes<uint64_t>(ShAmt);
768     if (!isMask_64(Mask))
769       break;
770     unsigned TrailingOnes = countTrailingOnes(Mask);
771     // 32 trailing ones should use srliw via tablegen pattern.
772     if (TrailingOnes == 32 || ShAmt >= TrailingOnes)
773       break;
774     unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
775     SDNode *SLLI =
776         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
777                                CurDAG->getTargetConstant(LShAmt, DL, VT));
778     SDNode *SRLI = CurDAG->getMachineNode(
779         RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
780         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
781     ReplaceNode(Node, SRLI);
782     return;
783   }
784   case ISD::SRA: {
785     // Optimize (sra (sext_inreg X, i16), C) ->
786     //          (srai (slli X, (XLen-16), (XLen-16) + C)
787     // And      (sra (sext_inreg X, i8), C) ->
788     //          (srai (slli X, (XLen-8), (XLen-8) + C)
789     // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
790     // This transform matches the code we get without Zbb. The shifts are more
791     // compressible, and this can help expose CSE opportunities in the sdiv by
792     // constant optimization.
793     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
794     if (!N1C)
795       break;
796     SDValue N0 = Node->getOperand(0);
797     if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
798       break;
799     unsigned ShAmt = N1C->getZExtValue();
800     unsigned ExtSize =
801         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
802     // ExtSize of 32 should use sraiw via tablegen pattern.
803     if (ExtSize >= 32 || ShAmt >= ExtSize)
804       break;
805     unsigned LShAmt = Subtarget->getXLen() - ExtSize;
806     SDNode *SLLI =
807         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
808                                CurDAG->getTargetConstant(LShAmt, DL, VT));
809     SDNode *SRAI = CurDAG->getMachineNode(
810         RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
811         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
812     ReplaceNode(Node, SRAI);
813     return;
814   }
815   case ISD::AND: {
816     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
817     if (!N1C)
818       break;
819 
820     SDValue N0 = Node->getOperand(0);
821 
822     bool LeftShift = N0.getOpcode() == ISD::SHL;
823     if (!LeftShift && N0.getOpcode() != ISD::SRL)
824       break;
825 
826     auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
827     if (!C)
828       break;
829     uint64_t C2 = C->getZExtValue();
830     unsigned XLen = Subtarget->getXLen();
831     if (!C2 || C2 >= XLen)
832       break;
833 
834     uint64_t C1 = N1C->getZExtValue();
835 
836     // Keep track of whether this is a c.andi. If we can't use c.andi, the
837     // shift pair might offer more compression opportunities.
838     // TODO: We could check for C extension here, but we don't have many lit
839     // tests with the C extension enabled so not checking gets better coverage.
840     // TODO: What if ANDI faster than shift?
841     bool IsCANDI = isInt<6>(N1C->getSExtValue());
842 
843     // Clear irrelevant bits in the mask.
844     if (LeftShift)
845       C1 &= maskTrailingZeros<uint64_t>(C2);
846     else
847       C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
848 
849     // Some transforms should only be done if the shift has a single use or
850     // the AND would become (srli (slli X, 32), 32)
851     bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
852 
853     SDValue X = N0.getOperand(0);
854 
855     // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
856     // with c3 leading zeros.
857     if (!LeftShift && isMask_64(C1)) {
858       uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
859       if (C2 < C3) {
860         // If the number of leading zeros is C2+32 this can be SRLIW.
861         if (C2 + 32 == C3) {
862           SDNode *SRLIW = CurDAG->getMachineNode(
863               RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
864           ReplaceNode(Node, SRLIW);
865           return;
866         }
867 
868         // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if
869         // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
870         //
871         // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
872         // legalized and goes through DAG combine.
873         if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&
874             X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
875             cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
876           SDNode *SRAIW =
877               CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
878                                      CurDAG->getTargetConstant(31, DL, VT));
879           SDNode *SRLIW = CurDAG->getMachineNode(
880               RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
881               CurDAG->getTargetConstant(C3 - 32, DL, VT));
882           ReplaceNode(Node, SRLIW);
883           return;
884         }
885 
886         // (srli (slli x, c3-c2), c3).
887         // Skip if we could use (zext.w (sraiw X, C2)).
888         bool Skip = Subtarget->hasStdExtZba() && C3 == 32 &&
889                     X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
890                     cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
891         // Also Skip if we can use bexti.
892         Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1;
893         if (OneUseOrZExtW && !Skip) {
894           SDNode *SLLI = CurDAG->getMachineNode(
895               RISCV::SLLI, DL, VT, X,
896               CurDAG->getTargetConstant(C3 - C2, DL, VT));
897           SDNode *SRLI =
898               CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
899                                      CurDAG->getTargetConstant(C3, DL, VT));
900           ReplaceNode(Node, SRLI);
901           return;
902         }
903       }
904     }
905 
906     // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
907     // shifted by c2 bits with c3 leading zeros.
908     if (LeftShift && isShiftedMask_64(C1)) {
909       uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
910 
911       if (C2 + C3 < XLen &&
912           C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
913         // Use slli.uw when possible.
914         if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
915           SDNode *SLLI_UW = CurDAG->getMachineNode(
916               RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
917           ReplaceNode(Node, SLLI_UW);
918           return;
919         }
920 
921         // (srli (slli c2+c3), c3)
922         if (OneUseOrZExtW && !IsCANDI) {
923           SDNode *SLLI = CurDAG->getMachineNode(
924               RISCV::SLLI, DL, VT, X,
925               CurDAG->getTargetConstant(C2 + C3, DL, VT));
926           SDNode *SRLI =
927               CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
928                                      CurDAG->getTargetConstant(C3, DL, VT));
929           ReplaceNode(Node, SRLI);
930           return;
931         }
932       }
933     }
934 
935     // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
936     // shifted mask with c2 leading zeros and c3 trailing zeros.
937     if (!LeftShift && isShiftedMask_64(C1)) {
938       uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
939       uint64_t C3 = countTrailingZeros(C1);
940       if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) {
941         unsigned SrliOpc = RISCV::SRLI;
942         // If the input is zexti32 we should use SRLIW.
943         if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
944             X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
945           SrliOpc = RISCV::SRLIW;
946           X = X.getOperand(0);
947         }
948         SDNode *SRLI = CurDAG->getMachineNode(
949             SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT));
950         SDNode *SLLI =
951             CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
952                                    CurDAG->getTargetConstant(C3, DL, VT));
953         ReplaceNode(Node, SLLI);
954         return;
955       }
956       // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
957       if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
958           OneUseOrZExtW && !IsCANDI) {
959         SDNode *SRLIW =
960             CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,
961                                    CurDAG->getTargetConstant(C2 + C3, DL, VT));
962         SDNode *SLLI =
963             CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
964                                    CurDAG->getTargetConstant(C3, DL, VT));
965         ReplaceNode(Node, SLLI);
966         return;
967       }
968     }
969 
970     // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
971     // shifted mask with no leading zeros and c3 trailing zeros.
972     if (LeftShift && isShiftedMask_64(C1)) {
973       uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
974       uint64_t C3 = countTrailingZeros(C1);
975       if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) {
976         SDNode *SRLI = CurDAG->getMachineNode(
977             RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT));
978         SDNode *SLLI =
979             CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
980                                    CurDAG->getTargetConstant(C3, DL, VT));
981         ReplaceNode(Node, SLLI);
982         return;
983       }
984       // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
985       if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
986         SDNode *SRLIW =
987             CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,
988                                    CurDAG->getTargetConstant(C3 - C2, DL, VT));
989         SDNode *SLLI =
990             CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
991                                    CurDAG->getTargetConstant(C3, DL, VT));
992         ReplaceNode(Node, SLLI);
993         return;
994       }
995     }
996 
997     break;
998   }
999   case ISD::MUL: {
1000     // Special case for calculating (mul (and X, C2), C1) where the full product
1001     // fits in XLen bits. We can shift X left by the number of leading zeros in
1002     // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1003     // product has XLen trailing zeros, putting it in the output of MULHU. This
1004     // can avoid materializing a constant in a register for C2.
1005 
1006     // RHS should be a constant.
1007     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1008     if (!N1C || !N1C->hasOneUse())
1009       break;
1010 
1011     // LHS should be an AND with constant.
1012     SDValue N0 = Node->getOperand(0);
1013     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1014       break;
1015 
1016     uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
1017 
1018     // Constant should be a mask.
1019     if (!isMask_64(C2))
1020       break;
1021 
1022     // This should be the only use of the AND unless we will use
1023     // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND
1024     // constants.
1025     if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF))
1026       break;
1027 
1028     // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this
1029     // optimization.
1030     if (isInt<12>(C2) ||
1031         (C2 == UINT64_C(0xFFFF) &&
1032          (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) ||
1033         (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()))
1034       break;
1035 
1036     // We need to shift left the AND input and C1 by a total of XLen bits.
1037 
1038     // How far left do we need to shift the AND input?
1039     unsigned XLen = Subtarget->getXLen();
1040     unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2));
1041 
1042     // The constant gets shifted by the remaining amount unless that would
1043     // shift bits out.
1044     uint64_t C1 = N1C->getZExtValue();
1045     unsigned ConstantShift = XLen - LeadingZeros;
1046     if (ConstantShift > (XLen - (64 - countLeadingZeros(C1))))
1047       break;
1048 
1049     uint64_t ShiftedC1 = C1 << ConstantShift;
1050     // If this RV32, we need to sign extend the constant.
1051     if (XLen == 32)
1052       ShiftedC1 = SignExtend64<32>(ShiftedC1);
1053 
1054     // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1055     SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);
1056     SDNode *SLLI =
1057         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1058                                CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1059     SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1060                                            SDValue(SLLI, 0), SDValue(Imm, 0));
1061     ReplaceNode(Node, MULHU);
1062     return;
1063   }
1064   case ISD::INTRINSIC_WO_CHAIN: {
1065     unsigned IntNo = Node->getConstantOperandVal(0);
1066     switch (IntNo) {
1067       // By default we do not custom select any intrinsic.
1068     default:
1069       break;
1070     case Intrinsic::riscv_vmsgeu:
1071     case Intrinsic::riscv_vmsge: {
1072       SDValue Src1 = Node->getOperand(1);
1073       SDValue Src2 = Node->getOperand(2);
1074       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1075       bool IsCmpUnsignedZero = false;
1076       // Only custom select scalar second operand.
1077       if (Src2.getValueType() != XLenVT)
1078         break;
1079       // Small constants are handled with patterns.
1080       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1081         int64_t CVal = C->getSExtValue();
1082         if (CVal >= -15 && CVal <= 16) {
1083           if (!IsUnsigned || CVal != 0)
1084             break;
1085           IsCmpUnsignedZero = true;
1086         }
1087       }
1088       MVT Src1VT = Src1.getSimpleValueType();
1089       unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1090       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1091       default:
1092         llvm_unreachable("Unexpected LMUL!");
1093 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)            \
1094   case RISCVII::VLMUL::lmulenum:                                               \
1095     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1096                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1097     VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
1098     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b;                             \
1099     break;
1100         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1101         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1102         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1103         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
1104         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1105         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1106         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1107 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1108       }
1109       SDValue SEW = CurDAG->getTargetConstant(
1110           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1111       SDValue VL;
1112       selectVLOp(Node->getOperand(3), VL);
1113 
1114       // If vmsgeu with 0 immediate, expand it to vmset.
1115       if (IsCmpUnsignedZero) {
1116         ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1117         return;
1118       }
1119 
1120       // Expand to
1121       // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1122       SDValue Cmp = SDValue(
1123           CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1124           0);
1125       ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1126                                                {Cmp, Cmp, VL, SEW}));
1127       return;
1128     }
1129     case Intrinsic::riscv_vmsgeu_mask:
1130     case Intrinsic::riscv_vmsge_mask: {
1131       SDValue Src1 = Node->getOperand(2);
1132       SDValue Src2 = Node->getOperand(3);
1133       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1134       bool IsCmpUnsignedZero = false;
1135       // Only custom select scalar second operand.
1136       if (Src2.getValueType() != XLenVT)
1137         break;
1138       // Small constants are handled with patterns.
1139       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1140         int64_t CVal = C->getSExtValue();
1141         if (CVal >= -15 && CVal <= 16) {
1142           if (!IsUnsigned || CVal != 0)
1143             break;
1144           IsCmpUnsignedZero = true;
1145         }
1146       }
1147       MVT Src1VT = Src1.getSimpleValueType();
1148       unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1149           VMOROpcode;
1150       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1151       default:
1152         llvm_unreachable("Unexpected LMUL!");
1153 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)                         \
1154   case RISCVII::VLMUL::lmulenum:                                               \
1155     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1156                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1157     VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1158                                  : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1159     break;
1160         CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1161         CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1162         CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1163         CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1164         CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1165         CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1166         CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1167 #undef CASE_VMSLT_OPCODES
1168       }
1169       // Mask operations use the LMUL from the mask type.
1170       switch (RISCVTargetLowering::getLMUL(VT)) {
1171       default:
1172         llvm_unreachable("Unexpected LMUL!");
1173 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)                       \
1174   case RISCVII::VLMUL::lmulenum:                                               \
1175     VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
1176     VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
1177     VMOROpcode = RISCV::PseudoVMOR_MM_##suffix;                                \
1178     break;
1179         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1180         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1181         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1182         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)
1183         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)
1184         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)
1185         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)
1186 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1187       }
1188       SDValue SEW = CurDAG->getTargetConstant(
1189           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1190       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1191       SDValue VL;
1192       selectVLOp(Node->getOperand(5), VL);
1193       SDValue MaskedOff = Node->getOperand(1);
1194       SDValue Mask = Node->getOperand(4);
1195 
1196       // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1197       if (IsCmpUnsignedZero) {
1198         // We don't need vmor if the MaskedOff and the Mask are the same
1199         // value.
1200         if (Mask == MaskedOff) {
1201           ReplaceUses(Node, Mask.getNode());
1202           return;
1203         }
1204         ReplaceNode(Node,
1205                     CurDAG->getMachineNode(VMOROpcode, DL, VT,
1206                                            {Mask, MaskedOff, VL, MaskSEW}));
1207         return;
1208       }
1209 
1210       // If the MaskedOff value and the Mask are the same value use
1211       // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
1212       // This avoids needing to copy v0 to vd before starting the next sequence.
1213       if (Mask == MaskedOff) {
1214         SDValue Cmp = SDValue(
1215             CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1216             0);
1217         ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1218                                                  {Mask, Cmp, VL, MaskSEW}));
1219         return;
1220       }
1221 
1222       // Mask needs to be copied to V0.
1223       SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1224                                            RISCV::V0, Mask, SDValue());
1225       SDValue Glue = Chain.getValue(1);
1226       SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1227 
1228       // Otherwise use
1229       // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1230       // The result is mask undisturbed.
1231       // We use the same instructions to emulate mask agnostic behavior, because
1232       // the agnostic result can be either undisturbed or all 1.
1233       SDValue Cmp = SDValue(
1234           CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1235                                  {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1236           0);
1237       // vmxor.mm vd, vd, v0 is used to update active value.
1238       ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1239                                                {Cmp, Mask, VL, MaskSEW}));
1240       return;
1241     }
1242     case Intrinsic::riscv_vsetvli_opt:
1243     case Intrinsic::riscv_vsetvlimax_opt:
1244       return selectVSETVLI(Node);
1245     }
1246     break;
1247   }
1248   case ISD::INTRINSIC_W_CHAIN: {
1249     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1250     switch (IntNo) {
1251       // By default we do not custom select any intrinsic.
1252     default:
1253       break;
1254     case Intrinsic::riscv_vsetvli:
1255     case Intrinsic::riscv_vsetvlimax:
1256       return selectVSETVLI(Node);
1257     case Intrinsic::riscv_vlseg2:
1258     case Intrinsic::riscv_vlseg3:
1259     case Intrinsic::riscv_vlseg4:
1260     case Intrinsic::riscv_vlseg5:
1261     case Intrinsic::riscv_vlseg6:
1262     case Intrinsic::riscv_vlseg7:
1263     case Intrinsic::riscv_vlseg8: {
1264       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1265       return;
1266     }
1267     case Intrinsic::riscv_vlseg2_mask:
1268     case Intrinsic::riscv_vlseg3_mask:
1269     case Intrinsic::riscv_vlseg4_mask:
1270     case Intrinsic::riscv_vlseg5_mask:
1271     case Intrinsic::riscv_vlseg6_mask:
1272     case Intrinsic::riscv_vlseg7_mask:
1273     case Intrinsic::riscv_vlseg8_mask: {
1274       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1275       return;
1276     }
1277     case Intrinsic::riscv_vlsseg2:
1278     case Intrinsic::riscv_vlsseg3:
1279     case Intrinsic::riscv_vlsseg4:
1280     case Intrinsic::riscv_vlsseg5:
1281     case Intrinsic::riscv_vlsseg6:
1282     case Intrinsic::riscv_vlsseg7:
1283     case Intrinsic::riscv_vlsseg8: {
1284       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1285       return;
1286     }
1287     case Intrinsic::riscv_vlsseg2_mask:
1288     case Intrinsic::riscv_vlsseg3_mask:
1289     case Intrinsic::riscv_vlsseg4_mask:
1290     case Intrinsic::riscv_vlsseg5_mask:
1291     case Intrinsic::riscv_vlsseg6_mask:
1292     case Intrinsic::riscv_vlsseg7_mask:
1293     case Intrinsic::riscv_vlsseg8_mask: {
1294       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1295       return;
1296     }
1297     case Intrinsic::riscv_vloxseg2:
1298     case Intrinsic::riscv_vloxseg3:
1299     case Intrinsic::riscv_vloxseg4:
1300     case Intrinsic::riscv_vloxseg5:
1301     case Intrinsic::riscv_vloxseg6:
1302     case Intrinsic::riscv_vloxseg7:
1303     case Intrinsic::riscv_vloxseg8:
1304       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1305       return;
1306     case Intrinsic::riscv_vluxseg2:
1307     case Intrinsic::riscv_vluxseg3:
1308     case Intrinsic::riscv_vluxseg4:
1309     case Intrinsic::riscv_vluxseg5:
1310     case Intrinsic::riscv_vluxseg6:
1311     case Intrinsic::riscv_vluxseg7:
1312     case Intrinsic::riscv_vluxseg8:
1313       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1314       return;
1315     case Intrinsic::riscv_vloxseg2_mask:
1316     case Intrinsic::riscv_vloxseg3_mask:
1317     case Intrinsic::riscv_vloxseg4_mask:
1318     case Intrinsic::riscv_vloxseg5_mask:
1319     case Intrinsic::riscv_vloxseg6_mask:
1320     case Intrinsic::riscv_vloxseg7_mask:
1321     case Intrinsic::riscv_vloxseg8_mask:
1322       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1323       return;
1324     case Intrinsic::riscv_vluxseg2_mask:
1325     case Intrinsic::riscv_vluxseg3_mask:
1326     case Intrinsic::riscv_vluxseg4_mask:
1327     case Intrinsic::riscv_vluxseg5_mask:
1328     case Intrinsic::riscv_vluxseg6_mask:
1329     case Intrinsic::riscv_vluxseg7_mask:
1330     case Intrinsic::riscv_vluxseg8_mask:
1331       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1332       return;
1333     case Intrinsic::riscv_vlseg8ff:
1334     case Intrinsic::riscv_vlseg7ff:
1335     case Intrinsic::riscv_vlseg6ff:
1336     case Intrinsic::riscv_vlseg5ff:
1337     case Intrinsic::riscv_vlseg4ff:
1338     case Intrinsic::riscv_vlseg3ff:
1339     case Intrinsic::riscv_vlseg2ff: {
1340       selectVLSEGFF(Node, /*IsMasked*/ false);
1341       return;
1342     }
1343     case Intrinsic::riscv_vlseg8ff_mask:
1344     case Intrinsic::riscv_vlseg7ff_mask:
1345     case Intrinsic::riscv_vlseg6ff_mask:
1346     case Intrinsic::riscv_vlseg5ff_mask:
1347     case Intrinsic::riscv_vlseg4ff_mask:
1348     case Intrinsic::riscv_vlseg3ff_mask:
1349     case Intrinsic::riscv_vlseg2ff_mask: {
1350       selectVLSEGFF(Node, /*IsMasked*/ true);
1351       return;
1352     }
1353     case Intrinsic::riscv_vloxei:
1354     case Intrinsic::riscv_vloxei_mask:
1355     case Intrinsic::riscv_vluxei:
1356     case Intrinsic::riscv_vluxei_mask: {
1357       bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1358                       IntNo == Intrinsic::riscv_vluxei_mask;
1359       bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1360                        IntNo == Intrinsic::riscv_vloxei_mask;
1361 
1362       MVT VT = Node->getSimpleValueType(0);
1363       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1364 
1365       unsigned CurOp = 2;
1366       // Masked intrinsic only have TU version pseduo instructions.
1367       bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
1368       SmallVector<SDValue, 8> Operands;
1369       if (IsTU)
1370         Operands.push_back(Node->getOperand(CurOp++));
1371       else
1372         // Skip the undef passthru operand for nomask TA version pseudo
1373         CurOp++;
1374 
1375       MVT IndexVT;
1376       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1377                                  /*IsStridedOrIndexed*/ true, Operands,
1378                                  /*IsLoad=*/true, &IndexVT);
1379 
1380       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1381              "Element count mismatch");
1382 
1383       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1384       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1385       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1386       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1387         report_fatal_error("The V extension does not support EEW=64 for index "
1388                            "values when XLEN=32");
1389       }
1390       const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1391           IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1392           static_cast<unsigned>(IndexLMUL));
1393       MachineSDNode *Load =
1394           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1395 
1396       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1397         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1398 
1399       ReplaceNode(Node, Load);
1400       return;
1401     }
1402     case Intrinsic::riscv_vlm:
1403     case Intrinsic::riscv_vle:
1404     case Intrinsic::riscv_vle_mask:
1405     case Intrinsic::riscv_vlse:
1406     case Intrinsic::riscv_vlse_mask: {
1407       bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1408                       IntNo == Intrinsic::riscv_vlse_mask;
1409       bool IsStrided =
1410           IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1411 
1412       MVT VT = Node->getSimpleValueType(0);
1413       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1414 
1415       unsigned CurOp = 2;
1416       // The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
1417       bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1418       // Masked intrinsic only have TU version pseduo instructions.
1419       bool IsTU =
1420           HasPassthruOperand &&
1421           ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked);
1422       SmallVector<SDValue, 8> Operands;
1423       if (IsTU)
1424         Operands.push_back(Node->getOperand(CurOp++));
1425       else if (HasPassthruOperand)
1426         // Skip the undef passthru operand for nomask TA version pseudo
1427         CurOp++;
1428 
1429       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1430                                  Operands, /*IsLoad=*/true);
1431 
1432       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1433       const RISCV::VLEPseudo *P =
1434           RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
1435                               static_cast<unsigned>(LMUL));
1436       MachineSDNode *Load =
1437           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1438 
1439       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1440         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1441 
1442       ReplaceNode(Node, Load);
1443       return;
1444     }
1445     case Intrinsic::riscv_vleff:
1446     case Intrinsic::riscv_vleff_mask: {
1447       bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1448 
1449       MVT VT = Node->getSimpleValueType(0);
1450       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1451 
1452       unsigned CurOp = 2;
1453       // Masked intrinsic only have TU version pseduo instructions.
1454       bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
1455       SmallVector<SDValue, 7> Operands;
1456       if (IsTU)
1457         Operands.push_back(Node->getOperand(CurOp++));
1458       else
1459         // Skip the undef passthru operand for nomask TA version pseudo
1460         CurOp++;
1461 
1462       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1463                                  /*IsStridedOrIndexed*/ false, Operands,
1464                                  /*IsLoad=*/true);
1465 
1466       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1467       const RISCV::VLEPseudo *P =
1468           RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
1469                               Log2SEW, static_cast<unsigned>(LMUL));
1470       MachineSDNode *Load = CurDAG->getMachineNode(
1471           P->Pseudo, DL, Node->getVTList(), Operands);
1472       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1473         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1474 
1475       ReplaceNode(Node, Load);
1476       return;
1477     }
1478     }
1479     break;
1480   }
1481   case ISD::INTRINSIC_VOID: {
1482     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1483     switch (IntNo) {
1484     case Intrinsic::riscv_vsseg2:
1485     case Intrinsic::riscv_vsseg3:
1486     case Intrinsic::riscv_vsseg4:
1487     case Intrinsic::riscv_vsseg5:
1488     case Intrinsic::riscv_vsseg6:
1489     case Intrinsic::riscv_vsseg7:
1490     case Intrinsic::riscv_vsseg8: {
1491       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1492       return;
1493     }
1494     case Intrinsic::riscv_vsseg2_mask:
1495     case Intrinsic::riscv_vsseg3_mask:
1496     case Intrinsic::riscv_vsseg4_mask:
1497     case Intrinsic::riscv_vsseg5_mask:
1498     case Intrinsic::riscv_vsseg6_mask:
1499     case Intrinsic::riscv_vsseg7_mask:
1500     case Intrinsic::riscv_vsseg8_mask: {
1501       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1502       return;
1503     }
1504     case Intrinsic::riscv_vssseg2:
1505     case Intrinsic::riscv_vssseg3:
1506     case Intrinsic::riscv_vssseg4:
1507     case Intrinsic::riscv_vssseg5:
1508     case Intrinsic::riscv_vssseg6:
1509     case Intrinsic::riscv_vssseg7:
1510     case Intrinsic::riscv_vssseg8: {
1511       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1512       return;
1513     }
1514     case Intrinsic::riscv_vssseg2_mask:
1515     case Intrinsic::riscv_vssseg3_mask:
1516     case Intrinsic::riscv_vssseg4_mask:
1517     case Intrinsic::riscv_vssseg5_mask:
1518     case Intrinsic::riscv_vssseg6_mask:
1519     case Intrinsic::riscv_vssseg7_mask:
1520     case Intrinsic::riscv_vssseg8_mask: {
1521       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1522       return;
1523     }
1524     case Intrinsic::riscv_vsoxseg2:
1525     case Intrinsic::riscv_vsoxseg3:
1526     case Intrinsic::riscv_vsoxseg4:
1527     case Intrinsic::riscv_vsoxseg5:
1528     case Intrinsic::riscv_vsoxseg6:
1529     case Intrinsic::riscv_vsoxseg7:
1530     case Intrinsic::riscv_vsoxseg8:
1531       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1532       return;
1533     case Intrinsic::riscv_vsuxseg2:
1534     case Intrinsic::riscv_vsuxseg3:
1535     case Intrinsic::riscv_vsuxseg4:
1536     case Intrinsic::riscv_vsuxseg5:
1537     case Intrinsic::riscv_vsuxseg6:
1538     case Intrinsic::riscv_vsuxseg7:
1539     case Intrinsic::riscv_vsuxseg8:
1540       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1541       return;
1542     case Intrinsic::riscv_vsoxseg2_mask:
1543     case Intrinsic::riscv_vsoxseg3_mask:
1544     case Intrinsic::riscv_vsoxseg4_mask:
1545     case Intrinsic::riscv_vsoxseg5_mask:
1546     case Intrinsic::riscv_vsoxseg6_mask:
1547     case Intrinsic::riscv_vsoxseg7_mask:
1548     case Intrinsic::riscv_vsoxseg8_mask:
1549       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1550       return;
1551     case Intrinsic::riscv_vsuxseg2_mask:
1552     case Intrinsic::riscv_vsuxseg3_mask:
1553     case Intrinsic::riscv_vsuxseg4_mask:
1554     case Intrinsic::riscv_vsuxseg5_mask:
1555     case Intrinsic::riscv_vsuxseg6_mask:
1556     case Intrinsic::riscv_vsuxseg7_mask:
1557     case Intrinsic::riscv_vsuxseg8_mask:
1558       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1559       return;
1560     case Intrinsic::riscv_vsoxei:
1561     case Intrinsic::riscv_vsoxei_mask:
1562     case Intrinsic::riscv_vsuxei:
1563     case Intrinsic::riscv_vsuxei_mask: {
1564       bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
1565                       IntNo == Intrinsic::riscv_vsuxei_mask;
1566       bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
1567                        IntNo == Intrinsic::riscv_vsoxei_mask;
1568 
1569       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1570       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1571 
1572       unsigned CurOp = 2;
1573       SmallVector<SDValue, 8> Operands;
1574       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1575 
1576       MVT IndexVT;
1577       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1578                                  /*IsStridedOrIndexed*/ true, Operands,
1579                                  /*IsLoad=*/false, &IndexVT);
1580 
1581       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1582              "Element count mismatch");
1583 
1584       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1585       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1586       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1587       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1588         report_fatal_error("The V extension does not support EEW=64 for index "
1589                            "values when XLEN=32");
1590       }
1591       const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
1592           IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW,
1593           static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
1594       MachineSDNode *Store =
1595           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1596 
1597       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1598         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1599 
1600       ReplaceNode(Node, Store);
1601       return;
1602     }
1603     case Intrinsic::riscv_vsm:
1604     case Intrinsic::riscv_vse:
1605     case Intrinsic::riscv_vse_mask:
1606     case Intrinsic::riscv_vsse:
1607     case Intrinsic::riscv_vsse_mask: {
1608       bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
1609                       IntNo == Intrinsic::riscv_vsse_mask;
1610       bool IsStrided =
1611           IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
1612 
1613       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1614       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1615 
1616       unsigned CurOp = 2;
1617       SmallVector<SDValue, 8> Operands;
1618       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1619 
1620       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1621                                  Operands);
1622 
1623       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1624       const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
1625           IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
1626       MachineSDNode *Store =
1627           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1628       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1629         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1630 
1631       ReplaceNode(Node, Store);
1632       return;
1633     }
1634     }
1635     break;
1636   }
1637   case ISD::BITCAST: {
1638     MVT SrcVT = Node->getOperand(0).getSimpleValueType();
1639     // Just drop bitcasts between vectors if both are fixed or both are
1640     // scalable.
1641     if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
1642         (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
1643       ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
1644       CurDAG->RemoveDeadNode(Node);
1645       return;
1646     }
1647     break;
1648   }
1649   case ISD::INSERT_SUBVECTOR: {
1650     SDValue V = Node->getOperand(0);
1651     SDValue SubV = Node->getOperand(1);
1652     SDLoc DL(SubV);
1653     auto Idx = Node->getConstantOperandVal(2);
1654     MVT SubVecVT = SubV.getSimpleValueType();
1655 
1656     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1657     MVT SubVecContainerVT = SubVecVT;
1658     // Establish the correct scalable-vector types for any fixed-length type.
1659     if (SubVecVT.isFixedLengthVector())
1660       SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
1661     if (VT.isFixedLengthVector())
1662       VT = TLI.getContainerForFixedLengthVector(VT);
1663 
1664     const auto *TRI = Subtarget->getRegisterInfo();
1665     unsigned SubRegIdx;
1666     std::tie(SubRegIdx, Idx) =
1667         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1668             VT, SubVecContainerVT, Idx, TRI);
1669 
1670     // If the Idx hasn't been completely eliminated then this is a subvector
1671     // insert which doesn't naturally align to a vector register. These must
1672     // be handled using instructions to manipulate the vector registers.
1673     if (Idx != 0)
1674       break;
1675 
1676     RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
1677     bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
1678                            SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
1679                            SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
1680     (void)IsSubVecPartReg; // Silence unused variable warning without asserts.
1681     assert((!IsSubVecPartReg || V.isUndef()) &&
1682            "Expecting lowering to have created legal INSERT_SUBVECTORs when "
1683            "the subvector is smaller than a full-sized register");
1684 
1685     // If we haven't set a SubRegIdx, then we must be going between
1686     // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
1687     if (SubRegIdx == RISCV::NoSubRegister) {
1688       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT);
1689       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
1690                  InRegClassID &&
1691              "Unexpected subvector extraction");
1692       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
1693       SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
1694                                                DL, VT, SubV, RC);
1695       ReplaceNode(Node, NewNode);
1696       return;
1697     }
1698 
1699     SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
1700     ReplaceNode(Node, Insert.getNode());
1701     return;
1702   }
1703   case ISD::EXTRACT_SUBVECTOR: {
1704     SDValue V = Node->getOperand(0);
1705     auto Idx = Node->getConstantOperandVal(1);
1706     MVT InVT = V.getSimpleValueType();
1707     SDLoc DL(V);
1708 
1709     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1710     MVT SubVecContainerVT = VT;
1711     // Establish the correct scalable-vector types for any fixed-length type.
1712     if (VT.isFixedLengthVector())
1713       SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
1714     if (InVT.isFixedLengthVector())
1715       InVT = TLI.getContainerForFixedLengthVector(InVT);
1716 
1717     const auto *TRI = Subtarget->getRegisterInfo();
1718     unsigned SubRegIdx;
1719     std::tie(SubRegIdx, Idx) =
1720         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1721             InVT, SubVecContainerVT, Idx, TRI);
1722 
1723     // If the Idx hasn't been completely eliminated then this is a subvector
1724     // extract which doesn't naturally align to a vector register. These must
1725     // be handled using instructions to manipulate the vector registers.
1726     if (Idx != 0)
1727       break;
1728 
1729     // If we haven't set a SubRegIdx, then we must be going between
1730     // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
1731     if (SubRegIdx == RISCV::NoSubRegister) {
1732       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
1733       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
1734                  InRegClassID &&
1735              "Unexpected subvector extraction");
1736       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
1737       SDNode *NewNode =
1738           CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
1739       ReplaceNode(Node, NewNode);
1740       return;
1741     }
1742 
1743     SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
1744     ReplaceNode(Node, Extract.getNode());
1745     return;
1746   }
1747   case ISD::SPLAT_VECTOR:
1748   case RISCVISD::VMV_S_X_VL:
1749   case RISCVISD::VFMV_S_F_VL:
1750   case RISCVISD::VMV_V_X_VL:
1751   case RISCVISD::VFMV_V_F_VL: {
1752     // Try to match splat of a scalar load to a strided load with stride of x0.
1753     bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
1754                         Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
1755     bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR;
1756     if (HasPassthruOperand && !Node->getOperand(0).isUndef())
1757       break;
1758     SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0);
1759     auto *Ld = dyn_cast<LoadSDNode>(Src);
1760     if (!Ld)
1761       break;
1762     EVT MemVT = Ld->getMemoryVT();
1763     // The memory VT should be the same size as the element type.
1764     if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
1765       break;
1766     if (!IsProfitableToFold(Src, Node, Node) ||
1767         !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
1768       break;
1769 
1770     SDValue VL;
1771     if (Node->getOpcode() == ISD::SPLAT_VECTOR)
1772       VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1773     else if (IsScalarMove) {
1774       // We could deal with more VL if we update the VSETVLI insert pass to
1775       // avoid introducing more VSETVLI.
1776       if (!isOneConstant(Node->getOperand(2)))
1777         break;
1778       selectVLOp(Node->getOperand(2), VL);
1779     } else
1780       selectVLOp(Node->getOperand(2), VL);
1781 
1782     unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1783     SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
1784 
1785     SDValue Operands[] = {Ld->getBasePtr(),
1786                           CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW,
1787                           Ld->getChain()};
1788 
1789     RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1790     const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
1791         /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false,
1792         Log2SEW, static_cast<unsigned>(LMUL));
1793     MachineSDNode *Load =
1794         CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1795 
1796     CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
1797 
1798     ReplaceNode(Node, Load);
1799     return;
1800   }
1801   }
1802 
1803   // Select the default instruction.
1804   SelectCode(Node);
1805 }
1806 
1807 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
1808     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
1809   switch (ConstraintID) {
1810   case InlineAsm::Constraint_m:
1811     // We just support simple memory operands that have a single address
1812     // operand and need no special handling.
1813     OutOps.push_back(Op);
1814     return false;
1815   case InlineAsm::Constraint_A:
1816     OutOps.push_back(Op);
1817     return false;
1818   default:
1819     break;
1820   }
1821 
1822   return true;
1823 }
1824 
1825 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
1826                                              SDValue &Offset) {
1827   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
1828     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
1829     Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
1830     return true;
1831   }
1832 
1833   return false;
1834 }
1835 
1836 // Select a frame index and an optional immediate offset from an ADD or OR.
1837 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
1838                                               SDValue &Offset) {
1839   if (SelectAddrFrameIndex(Addr, Base, Offset))
1840     return true;
1841 
1842   if (!CurDAG->isBaseWithConstantOffset(Addr))
1843     return false;
1844 
1845   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
1846     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1847     if (isInt<12>(CVal)) {
1848       Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
1849                                          Subtarget->getXLenVT());
1850       Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),
1851                                          Subtarget->getXLenVT());
1852       return true;
1853     }
1854   }
1855 
1856   return false;
1857 }
1858 
1859 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {
1860   // If this is FrameIndex, select it directly. Otherwise just let it get
1861   // selected to a register independently.
1862   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr))
1863     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
1864   else
1865     Base = Addr;
1866   return true;
1867 }
1868 
1869 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
1870                                          SDValue &Offset) {
1871   if (SelectAddrFrameIndex(Addr, Base, Offset))
1872     return true;
1873 
1874   SDLoc DL(Addr);
1875   MVT VT = Addr.getSimpleValueType();
1876 
1877   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1878     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1879     if (isInt<12>(CVal)) {
1880       Base = Addr.getOperand(0);
1881       if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
1882         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
1883       Offset = CurDAG->getTargetConstant(CVal, DL, VT);
1884       return true;
1885     }
1886   }
1887 
1888   // Handle ADD with large immediates.
1889   if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
1890     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1891     assert(!isInt<12>(CVal) && "simm12 not already handled?");
1892 
1893     if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
1894       // We can use an ADDI for part of the offset and fold the rest into the
1895       // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
1896       int64_t Adj = CVal < 0 ? -2048 : 2047;
1897       Base = SDValue(
1898           CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
1899                                  CurDAG->getTargetConstant(Adj, DL, VT)),
1900           0);
1901       Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
1902       return true;
1903     }
1904   }
1905 
1906   Base = Addr;
1907   Offset = CurDAG->getTargetConstant(0, DL, VT);
1908   return true;
1909 }
1910 
1911 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
1912                                         SDValue &ShAmt) {
1913   // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift
1914   // amount. If there is an AND on the shift amount, we can bypass it if it
1915   // doesn't affect any of those bits.
1916   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
1917     const APInt &AndMask = N->getConstantOperandAPInt(1);
1918 
1919     // Since the max shift amount is a power of 2 we can subtract 1 to make a
1920     // mask that covers the bits needed to represent all shift amounts.
1921     assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
1922     APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
1923 
1924     if (ShMask.isSubsetOf(AndMask)) {
1925       ShAmt = N.getOperand(0);
1926       return true;
1927     }
1928 
1929     // SimplifyDemandedBits may have optimized the mask so try restoring any
1930     // bits that are known zero.
1931     KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0));
1932     if (ShMask.isSubsetOf(AndMask | Known.Zero)) {
1933       ShAmt = N.getOperand(0);
1934       return true;
1935     }
1936   } else if (N.getOpcode() == ISD::SUB &&
1937              isa<ConstantSDNode>(N.getOperand(0))) {
1938     uint64_t Imm = N.getConstantOperandVal(0);
1939     // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
1940     // generate a NEG instead of a SUB of a constant.
1941     if (Imm != 0 && Imm % ShiftWidth == 0) {
1942       SDLoc DL(N);
1943       EVT VT = N.getValueType();
1944       SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
1945       unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
1946       MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
1947                                                   N.getOperand(1));
1948       ShAmt = SDValue(Neg, 0);
1949       return true;
1950     }
1951   }
1952 
1953   ShAmt = N;
1954   return true;
1955 }
1956 
1957 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) {
1958   if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1959       cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
1960     Val = N.getOperand(0);
1961     return true;
1962   }
1963   MVT VT = N.getSimpleValueType();
1964   if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) {
1965     Val = N;
1966     return true;
1967   }
1968 
1969   return false;
1970 }
1971 
1972 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
1973   if (N.getOpcode() == ISD::AND) {
1974     auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
1975     if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) {
1976       Val = N.getOperand(0);
1977       return true;
1978     }
1979   }
1980   MVT VT = N.getSimpleValueType();
1981   APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32);
1982   if (CurDAG->MaskedValueIsZero(N, Mask)) {
1983     Val = N;
1984     return true;
1985   }
1986 
1987   return false;
1988 }
1989 
1990 // Return true if all users of this SDNode* only consume the lower \p Bits.
1991 // This can be used to form W instructions for add/sub/mul/shl even when the
1992 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
1993 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
1994 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
1995 // the add/sub/mul/shl to become non-W instructions. By checking the users we
1996 // may be able to use a W instruction and CSE with the other instruction if
1997 // this has happened. We could try to detect that the CSE opportunity exists
1998 // before doing this, but that would be more complicated.
1999 // TODO: Does this need to look through AND/OR/XOR to their users to find more
2000 // opportunities.
2001 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
2002   assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
2003           Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
2004           Node->getOpcode() == ISD::SRL ||
2005           Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
2006           Node->getOpcode() == RISCVISD::GREV ||
2007           Node->getOpcode() == RISCVISD::GORC ||
2008           isa<ConstantSDNode>(Node)) &&
2009          "Unexpected opcode");
2010 
2011   for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
2012     SDNode *User = *UI;
2013     // Users of this node should have already been instruction selected
2014     if (!User->isMachineOpcode())
2015       return false;
2016 
2017     // TODO: Add more opcodes?
2018     switch (User->getMachineOpcode()) {
2019     default:
2020       return false;
2021     case RISCV::ADDW:
2022     case RISCV::ADDIW:
2023     case RISCV::SUBW:
2024     case RISCV::MULW:
2025     case RISCV::SLLW:
2026     case RISCV::SLLIW:
2027     case RISCV::SRAW:
2028     case RISCV::SRAIW:
2029     case RISCV::SRLW:
2030     case RISCV::SRLIW:
2031     case RISCV::DIVW:
2032     case RISCV::DIVUW:
2033     case RISCV::REMW:
2034     case RISCV::REMUW:
2035     case RISCV::ROLW:
2036     case RISCV::RORW:
2037     case RISCV::RORIW:
2038     case RISCV::CLZW:
2039     case RISCV::CTZW:
2040     case RISCV::CPOPW:
2041     case RISCV::SLLI_UW:
2042     case RISCV::FMV_W_X:
2043     case RISCV::FCVT_H_W:
2044     case RISCV::FCVT_H_WU:
2045     case RISCV::FCVT_S_W:
2046     case RISCV::FCVT_S_WU:
2047     case RISCV::FCVT_D_W:
2048     case RISCV::FCVT_D_WU:
2049       if (Bits < 32)
2050         return false;
2051       break;
2052     case RISCV::SLLI:
2053       // SLLI only uses the lower (XLen - ShAmt) bits.
2054       if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
2055         return false;
2056       break;
2057     case RISCV::ANDI:
2058       if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1))))
2059         return false;
2060       break;
2061     case RISCV::SEXT_B:
2062       if (Bits < 8)
2063         return false;
2064       break;
2065     case RISCV::SEXT_H:
2066     case RISCV::FMV_H_X:
2067     case RISCV::ZEXT_H_RV32:
2068     case RISCV::ZEXT_H_RV64:
2069       if (Bits < 16)
2070         return false;
2071       break;
2072     case RISCV::ADD_UW:
2073     case RISCV::SH1ADD_UW:
2074     case RISCV::SH2ADD_UW:
2075     case RISCV::SH3ADD_UW:
2076       // The first operand to add.uw/shXadd.uw is implicitly zero extended from
2077       // 32 bits.
2078       if (UI.getOperandNo() != 0 || Bits < 32)
2079         return false;
2080       break;
2081     case RISCV::SB:
2082       if (UI.getOperandNo() != 0 || Bits < 8)
2083         return false;
2084       break;
2085     case RISCV::SH:
2086       if (UI.getOperandNo() != 0 || Bits < 16)
2087         return false;
2088       break;
2089     case RISCV::SW:
2090       if (UI.getOperandNo() != 0 || Bits < 32)
2091         return false;
2092       break;
2093     }
2094   }
2095 
2096   return true;
2097 }
2098 
2099 // Select VL as a 5 bit immediate or a value that will become a register. This
2100 // allows us to choose betwen VSETIVLI or VSETVLI later.
2101 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
2102   auto *C = dyn_cast<ConstantSDNode>(N);
2103   if (C && isUInt<5>(C->getZExtValue())) {
2104     VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
2105                                    N->getValueType(0));
2106   } else if (C && C->isAllOnesValue()) {
2107     // Treat all ones as VLMax.
2108     VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
2109                                    N->getValueType(0));
2110   } else if (isa<RegisterSDNode>(N) &&
2111              cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
2112     // All our VL operands use an operand that allows GPRNoX0 or an immediate
2113     // as the register class. Convert X0 to a special immediate to pass the
2114     // MachineVerifier. This is recognized specially by the vsetvli insertion
2115     // pass.
2116     VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
2117                                    N->getValueType(0));
2118   } else {
2119     VL = N;
2120   }
2121 
2122   return true;
2123 }
2124 
2125 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
2126   if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())
2127     return false;
2128   SplatVal = N.getOperand(1);
2129   return true;
2130 }
2131 
2132 using ValidateFn = bool (*)(int64_t);
2133 
2134 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
2135                                    SelectionDAG &DAG,
2136                                    const RISCVSubtarget &Subtarget,
2137                                    ValidateFn ValidateImm) {
2138   if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
2139       !isa<ConstantSDNode>(N.getOperand(1)))
2140     return false;
2141 
2142   int64_t SplatImm =
2143       cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
2144 
2145   // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
2146   // type is wider than the resulting vector element type: an implicit
2147   // truncation first takes place. Therefore, perform a manual
2148   // truncation/sign-extension in order to ignore any truncated bits and catch
2149   // any zero-extended immediate.
2150   // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
2151   // sign-extending to (XLenVT -1).
2152   MVT XLenVT = Subtarget.getXLenVT();
2153   assert(XLenVT == N.getOperand(1).getSimpleValueType() &&
2154          "Unexpected splat operand type");
2155   MVT EltVT = N.getSimpleValueType().getVectorElementType();
2156   if (EltVT.bitsLT(XLenVT))
2157     SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
2158 
2159   if (!ValidateImm(SplatImm))
2160     return false;
2161 
2162   SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT);
2163   return true;
2164 }
2165 
2166 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
2167   return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget,
2168                                 [](int64_t Imm) { return isInt<5>(Imm); });
2169 }
2170 
2171 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
2172   return selectVSplatSimmHelper(
2173       N, SplatVal, *CurDAG, *Subtarget,
2174       [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
2175 }
2176 
2177 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
2178                                                       SDValue &SplatVal) {
2179   return selectVSplatSimmHelper(
2180       N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
2181         return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
2182       });
2183 }
2184 
2185 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
2186   if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
2187       !isa<ConstantSDNode>(N.getOperand(1)))
2188     return false;
2189 
2190   int64_t SplatImm =
2191       cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
2192 
2193   if (!isUInt<5>(SplatImm))
2194     return false;
2195 
2196   SplatVal =
2197       CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
2198 
2199   return true;
2200 }
2201 
2202 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
2203                                        SDValue &Imm) {
2204   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
2205     int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
2206 
2207     if (!isInt<5>(ImmVal))
2208       return false;
2209 
2210     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
2211     return true;
2212   }
2213 
2214   return false;
2215 }
2216 
2217 // Merge an ADDI into the offset of a load/store instruction where possible.
2218 // (load (addi base, off1), off2) -> (load base, off1+off2)
2219 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
2220 // (load (add base, (addi src, off1)), off2)
2221 //    -> (load (add base, src), off1+off2)
2222 // (store val, (add base, (addi src, off1)), off2)
2223 //    -> (store val, (add base, src), off1+off2)
2224 // This is possible when off1+off2 fits a 12-bit immediate.
2225 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
2226   unsigned OffsetOpIdx, BaseOpIdx;
2227   if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx))
2228     return false;
2229 
2230   if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
2231     return false;
2232 
2233   SDValue Base = N->getOperand(BaseOpIdx);
2234 
2235   if (!Base.isMachineOpcode())
2236     return false;
2237 
2238   if (Base.getMachineOpcode() == RISCV::ADDI) {
2239     // If the base is an ADDI, we can merge it in to the load/store.
2240   } else if (Base.getMachineOpcode() == RISCV::ADDIW &&
2241              isa<ConstantSDNode>(Base.getOperand(1)) &&
2242              Base.getOperand(0).isMachineOpcode() &&
2243              Base.getOperand(0).getMachineOpcode() == RISCV::LUI &&
2244              isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) {
2245     // ADDIW can be merged if it's part of LUI+ADDIW constant materialization
2246     // and LUI+ADDI would have produced the same result. This is true for all
2247     // simm32 values except 0x7ffff800-0x7fffffff.
2248     int64_t Offset =
2249       SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12);
2250     Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue();
2251     if (!isInt<32>(Offset))
2252       return false;
2253   } else
2254    return false;
2255 
2256   SDValue ImmOperand = Base.getOperand(1);
2257   uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
2258 
2259   if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
2260     int64_t Offset1 = Const->getSExtValue();
2261     int64_t CombinedOffset = Offset1 + Offset2;
2262     if (!isInt<12>(CombinedOffset))
2263       return false;
2264     ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
2265                                            ImmOperand.getValueType());
2266   } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
2267     // If the off1 in (addi base, off1) is a global variable's address (its
2268     // low part, really), then we can rely on the alignment of that variable
2269     // to provide a margin of safety before off1 can overflow the 12 bits.
2270     // Check if off2 falls within that margin; if so off1+off2 can't overflow.
2271     const DataLayout &DL = CurDAG->getDataLayout();
2272     Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
2273     if (Offset2 != 0 && Alignment <= Offset2)
2274       return false;
2275     int64_t Offset1 = GA->getOffset();
2276     int64_t CombinedOffset = Offset1 + Offset2;
2277     ImmOperand = CurDAG->getTargetGlobalAddress(
2278         GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
2279         CombinedOffset, GA->getTargetFlags());
2280   } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
2281     // Ditto.
2282     Align Alignment = CP->getAlign();
2283     if (Offset2 != 0 && Alignment <= Offset2)
2284       return false;
2285     int64_t Offset1 = CP->getOffset();
2286     int64_t CombinedOffset = Offset1 + Offset2;
2287     ImmOperand = CurDAG->getTargetConstantPool(
2288         CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
2289         CombinedOffset, CP->getTargetFlags());
2290   } else {
2291     return false;
2292   }
2293 
2294   LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
2295   LLVM_DEBUG(Base->dump(CurDAG));
2296   LLVM_DEBUG(dbgs() << "\nN: ");
2297   LLVM_DEBUG(N->dump(CurDAG));
2298   LLVM_DEBUG(dbgs() << "\n");
2299 
2300   // Modify the offset operand of the load/store.
2301   if (BaseOpIdx == 0) { // Load
2302     N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
2303                                    N->getOperand(2));
2304   } else { // Store
2305     N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
2306                                    ImmOperand, N->getOperand(3));
2307   }
2308 
2309   return true;
2310 }
2311 
2312 // Try to remove sext.w if the input is a W instruction or can be made into
2313 // a W instruction cheaply.
2314 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
2315   // Look for the sext.w pattern, addiw rd, rs1, 0.
2316   if (N->getMachineOpcode() != RISCV::ADDIW ||
2317       !isNullConstant(N->getOperand(1)))
2318     return false;
2319 
2320   SDValue N0 = N->getOperand(0);
2321   if (!N0.isMachineOpcode())
2322     return false;
2323 
2324   switch (N0.getMachineOpcode()) {
2325   default:
2326     break;
2327   case RISCV::ADD:
2328   case RISCV::ADDI:
2329   case RISCV::SUB:
2330   case RISCV::MUL:
2331   case RISCV::SLLI: {
2332     // Convert sext.w+add/sub/mul to their W instructions. This will create
2333     // a new independent instruction. This improves latency.
2334     unsigned Opc;
2335     switch (N0.getMachineOpcode()) {
2336     default:
2337       llvm_unreachable("Unexpected opcode!");
2338     case RISCV::ADD:  Opc = RISCV::ADDW;  break;
2339     case RISCV::ADDI: Opc = RISCV::ADDIW; break;
2340     case RISCV::SUB:  Opc = RISCV::SUBW;  break;
2341     case RISCV::MUL:  Opc = RISCV::MULW;  break;
2342     case RISCV::SLLI: Opc = RISCV::SLLIW; break;
2343     }
2344 
2345     SDValue N00 = N0.getOperand(0);
2346     SDValue N01 = N0.getOperand(1);
2347 
2348     // Shift amount needs to be uimm5.
2349     if (N0.getMachineOpcode() == RISCV::SLLI &&
2350         !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
2351       break;
2352 
2353     SDNode *Result =
2354         CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
2355                                N00, N01);
2356     ReplaceUses(N, Result);
2357     return true;
2358   }
2359   case RISCV::ADDW:
2360   case RISCV::ADDIW:
2361   case RISCV::SUBW:
2362   case RISCV::MULW:
2363   case RISCV::SLLIW:
2364   case RISCV::GREVIW:
2365   case RISCV::GORCIW:
2366     // Result is already sign extended just remove the sext.w.
2367     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
2368     ReplaceUses(N, N0.getNode());
2369     return true;
2370   }
2371 
2372   return false;
2373 }
2374 
2375 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
2376 // corresponding "unmasked" pseudo versions. The mask we're interested in will
2377 // take the form of a V0 physical register operand, with a glued
2378 // register-setting instruction.
2379 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
2380   const RISCV::RISCVMaskedPseudoInfo *I =
2381       RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
2382   if (!I)
2383     return false;
2384 
2385   unsigned MaskOpIdx = I->MaskOpIdx;
2386 
2387   // Check that we're using V0 as a mask register.
2388   if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) ||
2389       cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0)
2390     return false;
2391 
2392   // The glued user defines V0.
2393   const auto *Glued = N->getGluedNode();
2394 
2395   if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
2396     return false;
2397 
2398   // Check that we're defining V0 as a mask register.
2399   if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
2400       cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
2401     return false;
2402 
2403   // Check the instruction defining V0; it needs to be a VMSET pseudo.
2404   SDValue MaskSetter = Glued->getOperand(2);
2405 
2406   const auto IsVMSet = [](unsigned Opc) {
2407     return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
2408            Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
2409            Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
2410            Opc == RISCV::PseudoVMSET_M_B8;
2411   };
2412 
2413   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
2414   // undefined behaviour if it's the wrong bitwidth, so we could choose to
2415   // assume that it's all-ones? Same applies to its VL.
2416   if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode()))
2417     return false;
2418 
2419   // Retrieve the tail policy operand index, if any.
2420   Optional<unsigned> TailPolicyOpIdx;
2421   const RISCVInstrInfo &TII = *Subtarget->getInstrInfo();
2422   const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode());
2423 
2424   bool IsTA = true;
2425   if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) {
2426     // The last operand of the pseudo is the policy op, but we might have a
2427     // Glue operand last. We might also have a chain.
2428     TailPolicyOpIdx = N->getNumOperands() - 1;
2429     if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue)
2430       (*TailPolicyOpIdx)--;
2431     if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other)
2432       (*TailPolicyOpIdx)--;
2433 
2434     if (!(N->getConstantOperandVal(*TailPolicyOpIdx) &
2435           RISCVII::TAIL_AGNOSTIC)) {
2436       // Keep the true-masked instruction when there is no unmasked TU
2437       // instruction
2438       if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef())
2439         return false;
2440       // We can't use TA if the tie-operand is not IMPLICIT_DEF
2441       if (!N->getOperand(0).isUndef())
2442         IsTA = false;
2443     }
2444   }
2445 
2446   unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo;
2447 
2448   // Check that we're dropping the mask operand and any policy operand
2449   // when we transform to this unmasked pseudo. Additionally, if this insturtion
2450   // is tail agnostic, the unmasked instruction should not have a merge op.
2451   uint64_t TSFlags = TII.get(Opc).TSFlags;
2452   assert((IsTA != RISCVII::hasMergeOp(TSFlags)) &&
2453          RISCVII::hasDummyMaskOp(TSFlags) &&
2454          !RISCVII::hasVecPolicyOp(TSFlags) &&
2455          "Unexpected pseudo to transform to");
2456   (void)TSFlags;
2457 
2458   SmallVector<SDValue, 8> Ops;
2459   // Skip the merge operand at index 0 if IsTA
2460   for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) {
2461     // Skip the mask, the policy, and the Glue.
2462     SDValue Op = N->getOperand(I);
2463     if (I == MaskOpIdx || I == TailPolicyOpIdx ||
2464         Op.getValueType() == MVT::Glue)
2465       continue;
2466     Ops.push_back(Op);
2467   }
2468 
2469   // Transitively apply any node glued to our new node.
2470   if (auto *TGlued = Glued->getGluedNode())
2471     Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
2472 
2473   SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
2474   ReplaceUses(N, Result);
2475 
2476   return true;
2477 }
2478 
2479 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
2480 // for instruction scheduling.
2481 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
2482                                        CodeGenOpt::Level OptLevel) {
2483   return new RISCVDAGToDAGISel(TM, OptLevel);
2484 }
2485