1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineJumpTableInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/CodeGen/ValueTypes.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/DiagnosticPrinter.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/KnownBits.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/raw_ostream.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "riscv-lower"
45 
46 STATISTIC(NumTailCalls, "Number of tail calls");
47 
48 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
49                                          const RISCVSubtarget &STI)
50     : TargetLowering(TM), Subtarget(STI) {
51 
52   if (Subtarget.isRV32E())
53     report_fatal_error("Codegen not yet implemented for RV32E");
54 
55   RISCVABI::ABI ABI = Subtarget.getTargetABI();
56   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
57 
58   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
59       !Subtarget.hasStdExtF()) {
60     errs() << "Hard-float 'f' ABI can't be used for a target that "
61                 "doesn't support the F instruction set extension (ignoring "
62                           "target-abi)\n";
63     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
64   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
65              !Subtarget.hasStdExtD()) {
66     errs() << "Hard-float 'd' ABI can't be used for a target that "
67               "doesn't support the D instruction set extension (ignoring "
68               "target-abi)\n";
69     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
70   }
71 
72   switch (ABI) {
73   default:
74     report_fatal_error("Don't know how to lower this ABI");
75   case RISCVABI::ABI_ILP32:
76   case RISCVABI::ABI_ILP32F:
77   case RISCVABI::ABI_ILP32D:
78   case RISCVABI::ABI_LP64:
79   case RISCVABI::ABI_LP64F:
80   case RISCVABI::ABI_LP64D:
81     break;
82   }
83 
84   MVT XLenVT = Subtarget.getXLenVT();
85 
86   // Set up the register classes.
87   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
88 
89   if (Subtarget.hasStdExtZfh())
90     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
91   if (Subtarget.hasStdExtF())
92     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
93   if (Subtarget.hasStdExtD())
94     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
95 
96   static const MVT::SimpleValueType BoolVecVTs[] = {
97       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
98       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
99   static const MVT::SimpleValueType IntVecVTs[] = {
100       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
101       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
102       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
103       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
104       MVT::nxv4i64, MVT::nxv8i64};
105   static const MVT::SimpleValueType F16VecVTs[] = {
106       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
107       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
108   static const MVT::SimpleValueType F32VecVTs[] = {
109       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
110   static const MVT::SimpleValueType F64VecVTs[] = {
111       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
112 
113   if (Subtarget.hasVInstructions()) {
114     auto addRegClassForRVV = [this](MVT VT) {
115       unsigned Size = VT.getSizeInBits().getKnownMinValue();
116       assert(Size <= 512 && isPowerOf2_32(Size));
117       const TargetRegisterClass *RC;
118       if (Size <= 64)
119         RC = &RISCV::VRRegClass;
120       else if (Size == 128)
121         RC = &RISCV::VRM2RegClass;
122       else if (Size == 256)
123         RC = &RISCV::VRM4RegClass;
124       else
125         RC = &RISCV::VRM8RegClass;
126 
127       addRegisterClass(VT, RC);
128     };
129 
130     for (MVT VT : BoolVecVTs)
131       addRegClassForRVV(VT);
132     for (MVT VT : IntVecVTs) {
133       if (VT.getVectorElementType() == MVT::i64 &&
134           !Subtarget.hasVInstructionsI64())
135         continue;
136       addRegClassForRVV(VT);
137     }
138 
139     if (Subtarget.hasVInstructionsF16())
140       for (MVT VT : F16VecVTs)
141         addRegClassForRVV(VT);
142 
143     if (Subtarget.hasVInstructionsF32())
144       for (MVT VT : F32VecVTs)
145         addRegClassForRVV(VT);
146 
147     if (Subtarget.hasVInstructionsF64())
148       for (MVT VT : F64VecVTs)
149         addRegClassForRVV(VT);
150 
151     if (Subtarget.useRVVForFixedLengthVectors()) {
152       auto addRegClassForFixedVectors = [this](MVT VT) {
153         MVT ContainerVT = getContainerForFixedLengthVector(VT);
154         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
155         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
156         addRegisterClass(VT, TRI.getRegClass(RCID));
157       };
158       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
159         if (useRVVForFixedLengthVectorVT(VT))
160           addRegClassForFixedVectors(VT);
161 
162       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
163         if (useRVVForFixedLengthVectorVT(VT))
164           addRegClassForFixedVectors(VT);
165     }
166   }
167 
168   // Compute derived properties from the register classes.
169   computeRegisterProperties(STI.getRegisterInfo());
170 
171   setStackPointerRegisterToSaveRestore(RISCV::X2);
172 
173   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
174                    MVT::i1, Promote);
175 
176   // TODO: add all necessary setOperationAction calls.
177   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
178 
179   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
180   setOperationAction(ISD::BR_CC, XLenVT, Expand);
181   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
182   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
183 
184   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
185 
186   setOperationAction(ISD::VASTART, MVT::Other, Custom);
187   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
188 
189   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
190   if (!Subtarget.hasStdExtZbb())
191     setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
192 
193   if (Subtarget.is64Bit()) {
194     setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
195                        MVT::i32, Custom);
196 
197     setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
198                        MVT::i32, Custom);
199   } else {
200     setLibcallName(
201         {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
202         nullptr);
203     setLibcallName(RTLIB::MULO_I64, nullptr);
204   }
205 
206   if (!Subtarget.hasStdExtM()) {
207     setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::SDIV, ISD::UDIV,
208                         ISD::SREM, ISD::UREM},
209                        XLenVT, Expand);
210   } else {
211     if (Subtarget.is64Bit()) {
212       setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
213 
214       setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
215                          {MVT::i8, MVT::i16, MVT::i32}, Custom);
216     } else {
217       setOperationAction(ISD::MUL, MVT::i64, Custom);
218     }
219   }
220 
221   setOperationAction(
222       {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
223       Expand);
224 
225   setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
226                      Custom);
227 
228   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
229       Subtarget.hasStdExtZbkb()) {
230     if (Subtarget.is64Bit())
231       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
232   } else {
233     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
234   }
235 
236   if (Subtarget.hasStdExtZbp()) {
237     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
238     // more combining.
239     setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, XLenVT, Custom);
240 
241     // BSWAP i8 doesn't exist.
242     setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
243 
244     setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, MVT::i16, Custom);
245 
246     if (Subtarget.is64Bit())
247       setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, MVT::i32, Custom);
248   } else {
249     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
250     // pattern match it directly in isel.
251     setOperationAction(ISD::BSWAP, XLenVT,
252                        (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
253                            ? Legal
254                            : Expand);
255     // Zbkb can use rev8+brev8 to implement bitreverse.
256     setOperationAction(ISD::BITREVERSE, XLenVT,
257                        Subtarget.hasStdExtZbkb() ? Custom : Expand);
258   }
259 
260   if (Subtarget.hasStdExtZbb()) {
261     setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
262                        Legal);
263 
264     if (Subtarget.is64Bit())
265       setOperationAction(
266           {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
267           MVT::i32, Custom);
268   } else {
269     setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
270 
271     if (Subtarget.is64Bit())
272       setOperationAction(ISD::ABS, MVT::i32, Custom);
273   }
274 
275   if (Subtarget.hasStdExtZbt()) {
276     setOperationAction({ISD::FSHL, ISD::FSHR}, XLenVT, Custom);
277     setOperationAction(ISD::SELECT, XLenVT, Legal);
278 
279     if (Subtarget.is64Bit())
280       setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Custom);
281   } else {
282     setOperationAction(ISD::SELECT, XLenVT, Custom);
283   }
284 
285   static constexpr ISD::NodeType FPLegalNodeTypes[] = {
286       ISD::FMINNUM,        ISD::FMAXNUM,       ISD::LRINT,
287       ISD::LLRINT,         ISD::LROUND,        ISD::LLROUND,
288       ISD::STRICT_LRINT,   ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
289       ISD::STRICT_LLROUND, ISD::STRICT_FMA,    ISD::STRICT_FADD,
290       ISD::STRICT_FSUB,    ISD::STRICT_FMUL,   ISD::STRICT_FDIV,
291       ISD::STRICT_FSQRT,   ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
292 
293   static const ISD::CondCode FPCCToExpand[] = {
294       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
295       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
296       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
297 
298   static const ISD::NodeType FPOpToExpand[] = {
299       ISD::FSIN, ISD::FCOS,       ISD::FSINCOS,   ISD::FPOW,
300       ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
301 
302   if (Subtarget.hasStdExtZfh())
303     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
304 
305   if (Subtarget.hasStdExtZfh()) {
306     for (auto NT : FPLegalNodeTypes)
307       setOperationAction(NT, MVT::f16, Legal);
308     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
309     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
310     setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
311     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
312     setOperationAction(ISD::SELECT, MVT::f16, Custom);
313     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
314 
315     setOperationAction({ISD::FREM, ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT,
316                         ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC,
317                         ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN,
318                         ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FLOG,
319                         ISD::FLOG2, ISD::FLOG10},
320                        MVT::f16, Promote);
321 
322     // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
323     // complete support for all operations in LegalizeDAG.
324 
325     // We need to custom promote this.
326     if (Subtarget.is64Bit())
327       setOperationAction(ISD::FPOWI, MVT::i32, Custom);
328   }
329 
330   if (Subtarget.hasStdExtF()) {
331     for (auto NT : FPLegalNodeTypes)
332       setOperationAction(NT, MVT::f32, Legal);
333     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
334     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
335     setOperationAction(ISD::SELECT, MVT::f32, Custom);
336     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
337     for (auto Op : FPOpToExpand)
338       setOperationAction(Op, MVT::f32, Expand);
339     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
340     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
341   }
342 
343   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
344     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
345 
346   if (Subtarget.hasStdExtD()) {
347     for (auto NT : FPLegalNodeTypes)
348       setOperationAction(NT, MVT::f64, Legal);
349     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
350     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
351     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
352     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
353     setOperationAction(ISD::SELECT, MVT::f64, Custom);
354     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
355     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
356     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
357     for (auto Op : FPOpToExpand)
358       setOperationAction(Op, MVT::f64, Expand);
359     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
360     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
361   }
362 
363   if (Subtarget.is64Bit())
364     setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
365                         ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
366                        MVT::i32, Custom);
367 
368   if (Subtarget.hasStdExtF()) {
369     setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
370                        Custom);
371 
372     setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
373                         ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
374                        XLenVT, Legal);
375 
376     setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
377     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
378   }
379 
380   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
381                       ISD::JumpTable},
382                      XLenVT, Custom);
383 
384   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
385 
386   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
387   // Unfortunately this can't be determined just from the ISA naming string.
388   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
389                      Subtarget.is64Bit() ? Legal : Custom);
390 
391   setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
392   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
393   if (Subtarget.is64Bit())
394     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
395 
396   if (Subtarget.hasStdExtA()) {
397     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
398     setMinCmpXchgSizeInBits(32);
399   } else {
400     setMaxAtomicSizeInBitsSupported(0);
401   }
402 
403   setBooleanContents(ZeroOrOneBooleanContent);
404 
405   if (Subtarget.hasVInstructions()) {
406     setBooleanVectorContents(ZeroOrOneBooleanContent);
407 
408     setOperationAction(ISD::VSCALE, XLenVT, Custom);
409 
410     // RVV intrinsics may have illegal operands.
411     // We also need to custom legalize vmv.x.s.
412     setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
413                        {MVT::i8, MVT::i16}, Custom);
414     if (Subtarget.is64Bit())
415       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
416     else
417       setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
418                          MVT::i64, Custom);
419 
420     setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
421                        MVT::Other, Custom);
422 
423     static const unsigned IntegerVPOps[] = {
424         ISD::VP_ADD,         ISD::VP_SUB,         ISD::VP_MUL,
425         ISD::VP_SDIV,        ISD::VP_UDIV,        ISD::VP_SREM,
426         ISD::VP_UREM,        ISD::VP_AND,         ISD::VP_OR,
427         ISD::VP_XOR,         ISD::VP_ASHR,        ISD::VP_LSHR,
428         ISD::VP_SHL,         ISD::VP_REDUCE_ADD,  ISD::VP_REDUCE_AND,
429         ISD::VP_REDUCE_OR,   ISD::VP_REDUCE_XOR,  ISD::VP_REDUCE_SMAX,
430         ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
431         ISD::VP_MERGE,       ISD::VP_SELECT,      ISD::VP_FPTOSI,
432         ISD::VP_FPTOUI,      ISD::VP_SETCC,       ISD::VP_SIGN_EXTEND,
433         ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE};
434 
435     static const unsigned FloatingPointVPOps[] = {
436         ISD::VP_FADD,        ISD::VP_FSUB,
437         ISD::VP_FMUL,        ISD::VP_FDIV,
438         ISD::VP_FNEG,        ISD::VP_FMA,
439         ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
440         ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX,
441         ISD::VP_MERGE,       ISD::VP_SELECT,
442         ISD::VP_SITOFP,      ISD::VP_UITOFP,
443         ISD::VP_SETCC,       ISD::VP_FP_ROUND,
444         ISD::VP_FP_EXTEND};
445 
446     if (!Subtarget.is64Bit()) {
447       // We must custom-lower certain vXi64 operations on RV32 due to the vector
448       // element type being illegal.
449       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
450                          MVT::i64, Custom);
451 
452       setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND,
453                           ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR,
454                           ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
455                           ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN},
456                          MVT::i64, Custom);
457 
458       setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
459                           ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
460                           ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
461                           ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
462                          MVT::i64, Custom);
463     }
464 
465     for (MVT VT : BoolVecVTs) {
466       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
467 
468       // Mask VTs are custom-expanded into a series of standard nodes
469       setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
470                           ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
471                          VT, Custom);
472 
473       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
474                          Custom);
475 
476       setOperationAction(ISD::SELECT, VT, Custom);
477       setOperationAction(
478           {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
479           Expand);
480 
481       setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
482 
483       setOperationAction(
484           {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
485           Custom);
486 
487       setOperationAction(
488           {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
489           Custom);
490 
491       // RVV has native int->float & float->int conversions where the
492       // element type sizes are within one power-of-two of each other. Any
493       // wider distances between type sizes have to be lowered as sequences
494       // which progressively narrow the gap in stages.
495       setOperationAction(
496           {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
497           VT, Custom);
498 
499       // Expand all extending loads to types larger than this, and truncating
500       // stores from types larger than this.
501       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
502         setTruncStoreAction(OtherVT, VT, Expand);
503         setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
504                          VT, Expand);
505       }
506 
507       setOperationAction(
508           {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_TRUNCATE, ISD::VP_SETCC}, VT,
509           Custom);
510     }
511 
512     for (MVT VT : IntVecVTs) {
513       if (VT.getVectorElementType() == MVT::i64 &&
514           !Subtarget.hasVInstructionsI64())
515         continue;
516 
517       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
518       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
519 
520       // Vectors implement MULHS/MULHU.
521       setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
522 
523       // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
524       if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
525         setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
526 
527       setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
528                          Legal);
529 
530       setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
531 
532       setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP, ISD::BSWAP}, VT,
533                          Expand);
534 
535       setOperationAction(ISD::BSWAP, VT, Expand);
536 
537       // Custom-lower extensions and truncations from/to mask types.
538       setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
539                          VT, Custom);
540 
541       // RVV has native int->float & float->int conversions where the
542       // element type sizes are within one power-of-two of each other. Any
543       // wider distances between type sizes have to be lowered as sequences
544       // which progressively narrow the gap in stages.
545       setOperationAction(
546           {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
547           VT, Custom);
548 
549       setOperationAction(
550           {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
551 
552       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
553       // nodes which truncate by one power of two at a time.
554       setOperationAction(ISD::TRUNCATE, VT, Custom);
555 
556       // Custom-lower insert/extract operations to simplify patterns.
557       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
558                          Custom);
559 
560       // Custom-lower reduction operations to set up the corresponding custom
561       // nodes' operands.
562       setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND,
563                           ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR,
564                           ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
565                           ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN},
566                          VT, Custom);
567 
568       setOperationAction(IntegerVPOps, VT, Custom);
569 
570       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
571 
572       setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
573                          VT, Custom);
574 
575       setOperationAction(
576           {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
577           Custom);
578 
579       setOperationAction(
580           {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
581           VT, Custom);
582 
583       setOperationAction(ISD::SELECT, VT, Custom);
584       setOperationAction(ISD::SELECT_CC, VT, Expand);
585 
586       setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
587 
588       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
589         setTruncStoreAction(VT, OtherVT, Expand);
590         setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
591                          VT, Expand);
592       }
593 
594       // Splice
595       setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
596 
597       // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
598       // type that can represent the value exactly.
599       if (VT.getVectorElementType() != MVT::i64) {
600         MVT FloatEltVT =
601             VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
602         EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
603         if (isTypeLegal(FloatVT)) {
604           setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
605                              Custom);
606         }
607       }
608     }
609 
610     // Expand various CCs to best match the RVV ISA, which natively supports UNE
611     // but no other unordered comparisons, and supports all ordered comparisons
612     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
613     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
614     // and we pattern-match those back to the "original", swapping operands once
615     // more. This way we catch both operations and both "vf" and "fv" forms with
616     // fewer patterns.
617     static const ISD::CondCode VFPCCToExpand[] = {
618         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
619         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
620         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
621     };
622 
623     // Sets common operation actions on RVV floating-point vector types.
624     const auto SetCommonVFPActions = [&](MVT VT) {
625       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
626       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
627       // sizes are within one power-of-two of each other. Therefore conversions
628       // between vXf16 and vXf64 must be lowered as sequences which convert via
629       // vXf32.
630       setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
631       // Custom-lower insert/extract operations to simplify patterns.
632       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
633                          Custom);
634       // Expand various condition codes (explained above).
635       setCondCodeAction(VFPCCToExpand, VT, Expand);
636 
637       setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
638 
639       setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND},
640                          VT, Custom);
641 
642       setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD,
643                           ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX},
644                          VT, Custom);
645 
646       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
647 
648       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
649 
650       setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
651                          VT, Custom);
652 
653       setOperationAction(
654           {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
655           Custom);
656 
657       setOperationAction(ISD::SELECT, VT, Custom);
658       setOperationAction(ISD::SELECT_CC, VT, Expand);
659 
660       setOperationAction(
661           {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
662           VT, Custom);
663 
664       setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
665 
666       setOperationAction(FloatingPointVPOps, VT, Custom);
667     };
668 
669     // Sets common extload/truncstore actions on RVV floating-point vector
670     // types.
671     const auto SetCommonVFPExtLoadTruncStoreActions =
672         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
673           for (auto SmallVT : SmallerVTs) {
674             setTruncStoreAction(VT, SmallVT, Expand);
675             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
676           }
677         };
678 
679     if (Subtarget.hasVInstructionsF16())
680       for (MVT VT : F16VecVTs)
681         SetCommonVFPActions(VT);
682 
683     for (MVT VT : F32VecVTs) {
684       if (Subtarget.hasVInstructionsF32())
685         SetCommonVFPActions(VT);
686       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
687     }
688 
689     for (MVT VT : F64VecVTs) {
690       if (Subtarget.hasVInstructionsF64())
691         SetCommonVFPActions(VT);
692       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
693       SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
694     }
695 
696     if (Subtarget.useRVVForFixedLengthVectors()) {
697       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
698         if (!useRVVForFixedLengthVectorVT(VT))
699           continue;
700 
701         // By default everything must be expanded.
702         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
703           setOperationAction(Op, VT, Expand);
704         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
705           setTruncStoreAction(VT, OtherVT, Expand);
706           setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
707                            OtherVT, VT, Expand);
708         }
709 
710         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
711         setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
712                            Custom);
713 
714         setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
715                            Custom);
716 
717         setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
718                            VT, Custom);
719 
720         setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
721 
722         setOperationAction(ISD::SETCC, VT, Custom);
723 
724         setOperationAction(ISD::SELECT, VT, Custom);
725 
726         setOperationAction(ISD::TRUNCATE, VT, Custom);
727 
728         setOperationAction(ISD::BITCAST, VT, Custom);
729 
730         setOperationAction(
731             {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
732             Custom);
733 
734         setOperationAction(
735             {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
736             Custom);
737 
738         setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
739                             ISD::FP_TO_UINT},
740                            VT, Custom);
741 
742         // Operations below are different for between masks and other vectors.
743         if (VT.getVectorElementType() == MVT::i1) {
744           setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
745                               ISD::OR, ISD::XOR},
746                              VT, Custom);
747 
748           setOperationAction(
749               {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_TRUNCATE},
750               VT, Custom);
751           continue;
752         }
753 
754         // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
755         // it before type legalization for i64 vectors on RV32. It will then be
756         // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
757         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
758         // improvements first.
759         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
760           setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
761           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
762         }
763 
764         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
765         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
766 
767         setOperationAction(
768             {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
769 
770         setOperationAction(
771             {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
772             Custom);
773 
774         setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
775                             ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
776                             ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
777                            VT, Custom);
778 
779         setOperationAction(
780             {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
781 
782         // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
783         if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
784           setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
785 
786         setOperationAction(
787             {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
788             Custom);
789 
790         setOperationAction(ISD::VSELECT, VT, Custom);
791         setOperationAction(ISD::SELECT_CC, VT, Expand);
792 
793         setOperationAction(
794             {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
795 
796         // Custom-lower reduction operations to set up the corresponding custom
797         // nodes' operands.
798         setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
799                             ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
800                             ISD::VECREDUCE_UMIN},
801                            VT, Custom);
802 
803         setOperationAction(IntegerVPOps, VT, Custom);
804 
805         // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
806         // type that can represent the value exactly.
807         if (VT.getVectorElementType() != MVT::i64) {
808           MVT FloatEltVT =
809               VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
810           EVT FloatVT =
811               MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
812           if (isTypeLegal(FloatVT))
813             setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
814                                Custom);
815         }
816       }
817 
818       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
819         if (!useRVVForFixedLengthVectorVT(VT))
820           continue;
821 
822         // By default everything must be expanded.
823         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
824           setOperationAction(Op, VT, Expand);
825         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
826           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
827           setTruncStoreAction(VT, OtherVT, Expand);
828         }
829 
830         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
831         setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
832                            Custom);
833 
834         setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
835                             ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
836                             ISD::EXTRACT_VECTOR_ELT},
837                            VT, Custom);
838 
839         setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
840                             ISD::MGATHER, ISD::MSCATTER},
841                            VT, Custom);
842 
843         setOperationAction(
844             {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
845             Custom);
846 
847         setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
848                             ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
849                             ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM},
850                            VT, Custom);
851 
852         setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
853 
854         setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND},
855                            VT, Custom);
856 
857         for (auto CC : VFPCCToExpand)
858           setCondCodeAction(CC, VT, Expand);
859 
860         setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
861         setOperationAction(ISD::SELECT_CC, VT, Expand);
862 
863         setOperationAction(ISD::BITCAST, VT, Custom);
864 
865         setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD,
866                             ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX},
867                            VT, Custom);
868 
869         setOperationAction(FloatingPointVPOps, VT, Custom);
870       }
871 
872       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
873       setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
874                          Custom);
875       if (Subtarget.hasStdExtZfh())
876         setOperationAction(ISD::BITCAST, MVT::f16, Custom);
877       if (Subtarget.hasStdExtF())
878         setOperationAction(ISD::BITCAST, MVT::f32, Custom);
879       if (Subtarget.hasStdExtD())
880         setOperationAction(ISD::BITCAST, MVT::f64, Custom);
881     }
882   }
883 
884   // Function alignments.
885   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
886   setMinFunctionAlignment(FunctionAlignment);
887   setPrefFunctionAlignment(FunctionAlignment);
888 
889   setMinimumJumpTableEntries(5);
890 
891   // Jumps are expensive, compared to logic
892   setJumpIsExpensive();
893 
894   setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
895                        ISD::OR, ISD::XOR});
896 
897   if (Subtarget.hasStdExtF())
898     setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
899 
900   if (Subtarget.hasStdExtZbp())
901     setTargetDAGCombine({ISD::ROTL, ISD::ROTR});
902 
903   if (Subtarget.hasStdExtZbb())
904     setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
905 
906   if (Subtarget.hasStdExtZbkb())
907     setTargetDAGCombine(ISD::BITREVERSE);
908   if (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZbb())
909     setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
910   if (Subtarget.hasStdExtF())
911     setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
912                          ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
913   if (Subtarget.hasVInstructions())
914     setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
915                          ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
916                          ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR});
917 
918   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
919   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
920 }
921 
922 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
923                                             LLVMContext &Context,
924                                             EVT VT) const {
925   if (!VT.isVector())
926     return getPointerTy(DL);
927   if (Subtarget.hasVInstructions() &&
928       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
929     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
930   return VT.changeVectorElementTypeToInteger();
931 }
932 
933 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
934   return Subtarget.getXLenVT();
935 }
936 
937 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
938                                              const CallInst &I,
939                                              MachineFunction &MF,
940                                              unsigned Intrinsic) const {
941   auto &DL = I.getModule()->getDataLayout();
942   switch (Intrinsic) {
943   default:
944     return false;
945   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
946   case Intrinsic::riscv_masked_atomicrmw_add_i32:
947   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
948   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
949   case Intrinsic::riscv_masked_atomicrmw_max_i32:
950   case Intrinsic::riscv_masked_atomicrmw_min_i32:
951   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
952   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
953   case Intrinsic::riscv_masked_cmpxchg_i32:
954     Info.opc = ISD::INTRINSIC_W_CHAIN;
955     Info.memVT = MVT::i32;
956     Info.ptrVal = I.getArgOperand(0);
957     Info.offset = 0;
958     Info.align = Align(4);
959     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
960                  MachineMemOperand::MOVolatile;
961     return true;
962   case Intrinsic::riscv_masked_strided_load:
963     Info.opc = ISD::INTRINSIC_W_CHAIN;
964     Info.ptrVal = I.getArgOperand(1);
965     Info.memVT = getValueType(DL, I.getType()->getScalarType());
966     Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
967     Info.size = MemoryLocation::UnknownSize;
968     Info.flags |= MachineMemOperand::MOLoad;
969     return true;
970   case Intrinsic::riscv_masked_strided_store:
971     Info.opc = ISD::INTRINSIC_VOID;
972     Info.ptrVal = I.getArgOperand(1);
973     Info.memVT =
974         getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
975     Info.align = Align(
976         DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
977         8);
978     Info.size = MemoryLocation::UnknownSize;
979     Info.flags |= MachineMemOperand::MOStore;
980     return true;
981   case Intrinsic::riscv_seg2_load:
982   case Intrinsic::riscv_seg3_load:
983   case Intrinsic::riscv_seg4_load:
984   case Intrinsic::riscv_seg5_load:
985   case Intrinsic::riscv_seg6_load:
986   case Intrinsic::riscv_seg7_load:
987   case Intrinsic::riscv_seg8_load:
988     Info.opc = ISD::INTRINSIC_W_CHAIN;
989     Info.ptrVal = I.getArgOperand(0);
990     Info.memVT =
991         getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
992     Info.align =
993         Align(DL.getTypeSizeInBits(
994                   I.getType()->getStructElementType(0)->getScalarType()) /
995               8);
996     Info.size = MemoryLocation::UnknownSize;
997     Info.flags |= MachineMemOperand::MOLoad;
998     return true;
999   }
1000 }
1001 
1002 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1003                                                 const AddrMode &AM, Type *Ty,
1004                                                 unsigned AS,
1005                                                 Instruction *I) const {
1006   // No global is ever allowed as a base.
1007   if (AM.BaseGV)
1008     return false;
1009 
1010   // RVV instructions only support register addressing.
1011   if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1012     return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1013 
1014   // Require a 12-bit signed offset.
1015   if (!isInt<12>(AM.BaseOffs))
1016     return false;
1017 
1018   switch (AM.Scale) {
1019   case 0: // "r+i" or just "i", depending on HasBaseReg.
1020     break;
1021   case 1:
1022     if (!AM.HasBaseReg) // allow "r+i".
1023       break;
1024     return false; // disallow "r+r" or "r+r+i".
1025   default:
1026     return false;
1027   }
1028 
1029   return true;
1030 }
1031 
1032 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1033   return isInt<12>(Imm);
1034 }
1035 
1036 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1037   return isInt<12>(Imm);
1038 }
1039 
1040 // On RV32, 64-bit integers are split into their high and low parts and held
1041 // in two different registers, so the trunc is free since the low register can
1042 // just be used.
1043 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1044   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1045     return false;
1046   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1047   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1048   return (SrcBits == 64 && DestBits == 32);
1049 }
1050 
1051 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1052   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1053       !SrcVT.isInteger() || !DstVT.isInteger())
1054     return false;
1055   unsigned SrcBits = SrcVT.getSizeInBits();
1056   unsigned DestBits = DstVT.getSizeInBits();
1057   return (SrcBits == 64 && DestBits == 32);
1058 }
1059 
1060 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1061   // Zexts are free if they can be combined with a load.
1062   // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1063   // poorly with type legalization of compares preferring sext.
1064   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1065     EVT MemVT = LD->getMemoryVT();
1066     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1067         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1068          LD->getExtensionType() == ISD::ZEXTLOAD))
1069       return true;
1070   }
1071 
1072   return TargetLowering::isZExtFree(Val, VT2);
1073 }
1074 
1075 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1076   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1077 }
1078 
1079 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
1080   return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1081 }
1082 
1083 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
1084   return Subtarget.hasStdExtZbb();
1085 }
1086 
1087 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
1088   return Subtarget.hasStdExtZbb();
1089 }
1090 
1091 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1092   EVT VT = Y.getValueType();
1093 
1094   // FIXME: Support vectors once we have tests.
1095   if (VT.isVector())
1096     return false;
1097 
1098   return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
1099           Subtarget.hasStdExtZbkb()) &&
1100          !isa<ConstantSDNode>(Y);
1101 }
1102 
1103 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1104   // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1105   auto *C = dyn_cast<ConstantSDNode>(Y);
1106   return C && C->getAPIntValue().ule(10);
1107 }
1108 
1109 bool RISCVTargetLowering::
1110     shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1111         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1112         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1113         SelectionDAG &DAG) const {
1114   // One interesting pattern that we'd want to form is 'bit extract':
1115   //   ((1 >> Y) & 1) ==/!= 0
1116   // But we also need to be careful not to try to reverse that fold.
1117 
1118   // Is this '((1 >> Y) & 1)'?
1119   if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1120     return false; // Keep the 'bit extract' pattern.
1121 
1122   // Will this be '((1 >> Y) & 1)' after the transform?
1123   if (NewShiftOpcode == ISD::SRL && CC->isOne())
1124     return true; // Do form the 'bit extract' pattern.
1125 
1126   // If 'X' is a constant, and we transform, then we will immediately
1127   // try to undo the fold, thus causing endless combine loop.
1128   // So only do the transform if X is not a constant. This matches the default
1129   // implementation of this function.
1130   return !XC;
1131 }
1132 
1133 /// Check if sinking \p I's operands to I's basic block is profitable, because
1134 /// the operands can be folded into a target instruction, e.g.
1135 /// splats of scalars can fold into vector instructions.
1136 bool RISCVTargetLowering::shouldSinkOperands(
1137     Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1138   using namespace llvm::PatternMatch;
1139 
1140   if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1141     return false;
1142 
1143   auto IsSinker = [&](Instruction *I, int Operand) {
1144     switch (I->getOpcode()) {
1145     case Instruction::Add:
1146     case Instruction::Sub:
1147     case Instruction::Mul:
1148     case Instruction::And:
1149     case Instruction::Or:
1150     case Instruction::Xor:
1151     case Instruction::FAdd:
1152     case Instruction::FSub:
1153     case Instruction::FMul:
1154     case Instruction::FDiv:
1155     case Instruction::ICmp:
1156     case Instruction::FCmp:
1157       return true;
1158     case Instruction::Shl:
1159     case Instruction::LShr:
1160     case Instruction::AShr:
1161     case Instruction::UDiv:
1162     case Instruction::SDiv:
1163     case Instruction::URem:
1164     case Instruction::SRem:
1165       return Operand == 1;
1166     case Instruction::Call:
1167       if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1168         switch (II->getIntrinsicID()) {
1169         case Intrinsic::fma:
1170         case Intrinsic::vp_fma:
1171           return Operand == 0 || Operand == 1;
1172         // FIXME: Our patterns can only match vx/vf instructions when the splat
1173         // it on the RHS, because TableGen doesn't recognize our VP operations
1174         // as commutative.
1175         case Intrinsic::vp_add:
1176         case Intrinsic::vp_mul:
1177         case Intrinsic::vp_and:
1178         case Intrinsic::vp_or:
1179         case Intrinsic::vp_xor:
1180         case Intrinsic::vp_fadd:
1181         case Intrinsic::vp_fmul:
1182         case Intrinsic::vp_shl:
1183         case Intrinsic::vp_lshr:
1184         case Intrinsic::vp_ashr:
1185         case Intrinsic::vp_udiv:
1186         case Intrinsic::vp_sdiv:
1187         case Intrinsic::vp_urem:
1188         case Intrinsic::vp_srem:
1189           return Operand == 1;
1190         // ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have
1191         // explicit patterns for both LHS and RHS (as 'vr' versions).
1192         case Intrinsic::vp_sub:
1193         case Intrinsic::vp_fsub:
1194         case Intrinsic::vp_fdiv:
1195           return Operand == 0 || Operand == 1;
1196         default:
1197           return false;
1198         }
1199       }
1200       return false;
1201     default:
1202       return false;
1203     }
1204   };
1205 
1206   for (auto OpIdx : enumerate(I->operands())) {
1207     if (!IsSinker(I, OpIdx.index()))
1208       continue;
1209 
1210     Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1211     // Make sure we are not already sinking this operand
1212     if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1213       continue;
1214 
1215     // We are looking for a splat that can be sunk.
1216     if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1217                              m_Undef(), m_ZeroMask())))
1218       continue;
1219 
1220     // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1221     // and vector registers
1222     for (Use &U : Op->uses()) {
1223       Instruction *Insn = cast<Instruction>(U.getUser());
1224       if (!IsSinker(Insn, U.getOperandNo()))
1225         return false;
1226     }
1227 
1228     Ops.push_back(&Op->getOperandUse(0));
1229     Ops.push_back(&OpIdx.value());
1230   }
1231   return true;
1232 }
1233 
1234 bool RISCVTargetLowering::isOffsetFoldingLegal(
1235     const GlobalAddressSDNode *GA) const {
1236   // In order to maximise the opportunity for common subexpression elimination,
1237   // keep a separate ADD node for the global address offset instead of folding
1238   // it in the global address node. Later peephole optimisations may choose to
1239   // fold it back in when profitable.
1240   return false;
1241 }
1242 
1243 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1244                                        bool ForCodeSize) const {
1245   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1246   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1247     return false;
1248   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1249     return false;
1250   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1251     return false;
1252   return Imm.isZero();
1253 }
1254 
1255 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
1256   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1257          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1258          (VT == MVT::f64 && Subtarget.hasStdExtD());
1259 }
1260 
1261 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1262                                                       CallingConv::ID CC,
1263                                                       EVT VT) const {
1264   // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1265   // We might still end up using a GPR but that will be decided based on ABI.
1266   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1267   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1268     return MVT::f32;
1269 
1270   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1271 }
1272 
1273 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1274                                                            CallingConv::ID CC,
1275                                                            EVT VT) const {
1276   // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1277   // We might still end up using a GPR but that will be decided based on ABI.
1278   // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1279   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1280     return 1;
1281 
1282   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1283 }
1284 
1285 // Changes the condition code and swaps operands if necessary, so the SetCC
1286 // operation matches one of the comparisons supported directly by branches
1287 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1288 // with 1/-1.
1289 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1290                                     ISD::CondCode &CC, SelectionDAG &DAG) {
1291   // Convert X > -1 to X >= 0.
1292   if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1293     RHS = DAG.getConstant(0, DL, RHS.getValueType());
1294     CC = ISD::SETGE;
1295     return;
1296   }
1297   // Convert X < 1 to 0 >= X.
1298   if (CC == ISD::SETLT && isOneConstant(RHS)) {
1299     RHS = LHS;
1300     LHS = DAG.getConstant(0, DL, RHS.getValueType());
1301     CC = ISD::SETGE;
1302     return;
1303   }
1304 
1305   switch (CC) {
1306   default:
1307     break;
1308   case ISD::SETGT:
1309   case ISD::SETLE:
1310   case ISD::SETUGT:
1311   case ISD::SETULE:
1312     CC = ISD::getSetCCSwappedOperands(CC);
1313     std::swap(LHS, RHS);
1314     break;
1315   }
1316 }
1317 
1318 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
1319   assert(VT.isScalableVector() && "Expecting a scalable vector type");
1320   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1321   if (VT.getVectorElementType() == MVT::i1)
1322     KnownSize *= 8;
1323 
1324   switch (KnownSize) {
1325   default:
1326     llvm_unreachable("Invalid LMUL.");
1327   case 8:
1328     return RISCVII::VLMUL::LMUL_F8;
1329   case 16:
1330     return RISCVII::VLMUL::LMUL_F4;
1331   case 32:
1332     return RISCVII::VLMUL::LMUL_F2;
1333   case 64:
1334     return RISCVII::VLMUL::LMUL_1;
1335   case 128:
1336     return RISCVII::VLMUL::LMUL_2;
1337   case 256:
1338     return RISCVII::VLMUL::LMUL_4;
1339   case 512:
1340     return RISCVII::VLMUL::LMUL_8;
1341   }
1342 }
1343 
1344 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
1345   switch (LMul) {
1346   default:
1347     llvm_unreachable("Invalid LMUL.");
1348   case RISCVII::VLMUL::LMUL_F8:
1349   case RISCVII::VLMUL::LMUL_F4:
1350   case RISCVII::VLMUL::LMUL_F2:
1351   case RISCVII::VLMUL::LMUL_1:
1352     return RISCV::VRRegClassID;
1353   case RISCVII::VLMUL::LMUL_2:
1354     return RISCV::VRM2RegClassID;
1355   case RISCVII::VLMUL::LMUL_4:
1356     return RISCV::VRM4RegClassID;
1357   case RISCVII::VLMUL::LMUL_8:
1358     return RISCV::VRM8RegClassID;
1359   }
1360 }
1361 
1362 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
1363   RISCVII::VLMUL LMUL = getLMUL(VT);
1364   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1365       LMUL == RISCVII::VLMUL::LMUL_F4 ||
1366       LMUL == RISCVII::VLMUL::LMUL_F2 ||
1367       LMUL == RISCVII::VLMUL::LMUL_1) {
1368     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1369                   "Unexpected subreg numbering");
1370     return RISCV::sub_vrm1_0 + Index;
1371   }
1372   if (LMUL == RISCVII::VLMUL::LMUL_2) {
1373     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1374                   "Unexpected subreg numbering");
1375     return RISCV::sub_vrm2_0 + Index;
1376   }
1377   if (LMUL == RISCVII::VLMUL::LMUL_4) {
1378     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1379                   "Unexpected subreg numbering");
1380     return RISCV::sub_vrm4_0 + Index;
1381   }
1382   llvm_unreachable("Invalid vector type.");
1383 }
1384 
1385 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
1386   if (VT.getVectorElementType() == MVT::i1)
1387     return RISCV::VRRegClassID;
1388   return getRegClassIDForLMUL(getLMUL(VT));
1389 }
1390 
1391 // Attempt to decompose a subvector insert/extract between VecVT and
1392 // SubVecVT via subregister indices. Returns the subregister index that
1393 // can perform the subvector insert/extract with the given element index, as
1394 // well as the index corresponding to any leftover subvectors that must be
1395 // further inserted/extracted within the register class for SubVecVT.
1396 std::pair<unsigned, unsigned>
1397 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1398     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1399     const RISCVRegisterInfo *TRI) {
1400   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1401                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1402                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1403                 "Register classes not ordered");
1404   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1405   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1406   // Try to compose a subregister index that takes us from the incoming
1407   // LMUL>1 register class down to the outgoing one. At each step we half
1408   // the LMUL:
1409   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1410   // Note that this is not guaranteed to find a subregister index, such as
1411   // when we are extracting from one VR type to another.
1412   unsigned SubRegIdx = RISCV::NoSubRegister;
1413   for (const unsigned RCID :
1414        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1415     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1416       VecVT = VecVT.getHalfNumVectorElementsVT();
1417       bool IsHi =
1418           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1419       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1420                                             getSubregIndexByMVT(VecVT, IsHi));
1421       if (IsHi)
1422         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1423     }
1424   return {SubRegIdx, InsertExtractIdx};
1425 }
1426 
1427 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1428 // stores for those types.
1429 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1430   return !Subtarget.useRVVForFixedLengthVectors() ||
1431          (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
1432 }
1433 
1434 bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
1435   if (ScalarTy->isPointerTy())
1436     return true;
1437 
1438   if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1439       ScalarTy->isIntegerTy(32))
1440     return true;
1441 
1442   if (ScalarTy->isIntegerTy(64))
1443     return Subtarget.hasVInstructionsI64();
1444 
1445   if (ScalarTy->isHalfTy())
1446     return Subtarget.hasVInstructionsF16();
1447   if (ScalarTy->isFloatTy())
1448     return Subtarget.hasVInstructionsF32();
1449   if (ScalarTy->isDoubleTy())
1450     return Subtarget.hasVInstructionsF64();
1451 
1452   return false;
1453 }
1454 
1455 static SDValue getVLOperand(SDValue Op) {
1456   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1457           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
1458          "Unexpected opcode");
1459   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
1460   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
1461   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1462       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
1463   if (!II)
1464     return SDValue();
1465   return Op.getOperand(II->VLOperand + 1 + HasChain);
1466 }
1467 
1468 static bool useRVVForFixedLengthVectorVT(MVT VT,
1469                                          const RISCVSubtarget &Subtarget) {
1470   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1471   if (!Subtarget.useRVVForFixedLengthVectors())
1472     return false;
1473 
1474   // We only support a set of vector types with a consistent maximum fixed size
1475   // across all supported vector element types to avoid legalization issues.
1476   // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1477   // fixed-length vector type we support is 1024 bytes.
1478   if (VT.getFixedSizeInBits() > 1024 * 8)
1479     return false;
1480 
1481   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1482 
1483   MVT EltVT = VT.getVectorElementType();
1484 
1485   // Don't use RVV for vectors we cannot scalarize if required.
1486   switch (EltVT.SimpleTy) {
1487   // i1 is supported but has different rules.
1488   default:
1489     return false;
1490   case MVT::i1:
1491     // Masks can only use a single register.
1492     if (VT.getVectorNumElements() > MinVLen)
1493       return false;
1494     MinVLen /= 8;
1495     break;
1496   case MVT::i8:
1497   case MVT::i16:
1498   case MVT::i32:
1499     break;
1500   case MVT::i64:
1501     if (!Subtarget.hasVInstructionsI64())
1502       return false;
1503     break;
1504   case MVT::f16:
1505     if (!Subtarget.hasVInstructionsF16())
1506       return false;
1507     break;
1508   case MVT::f32:
1509     if (!Subtarget.hasVInstructionsF32())
1510       return false;
1511     break;
1512   case MVT::f64:
1513     if (!Subtarget.hasVInstructionsF64())
1514       return false;
1515     break;
1516   }
1517 
1518   // Reject elements larger than ELEN.
1519   if (EltVT.getSizeInBits() > Subtarget.getELEN())
1520     return false;
1521 
1522   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1523   // Don't use RVV for types that don't fit.
1524   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1525     return false;
1526 
1527   // TODO: Perhaps an artificial restriction, but worth having whilst getting
1528   // the base fixed length RVV support in place.
1529   if (!VT.isPow2VectorType())
1530     return false;
1531 
1532   return true;
1533 }
1534 
1535 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1536   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1537 }
1538 
1539 // Return the largest legal scalable vector type that matches VT's element type.
1540 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
1541                                             const RISCVSubtarget &Subtarget) {
1542   // This may be called before legal types are setup.
1543   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1544           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1545          "Expected legal fixed length vector!");
1546 
1547   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1548   unsigned MaxELen = Subtarget.getELEN();
1549 
1550   MVT EltVT = VT.getVectorElementType();
1551   switch (EltVT.SimpleTy) {
1552   default:
1553     llvm_unreachable("unexpected element type for RVV container");
1554   case MVT::i1:
1555   case MVT::i8:
1556   case MVT::i16:
1557   case MVT::i32:
1558   case MVT::i64:
1559   case MVT::f16:
1560   case MVT::f32:
1561   case MVT::f64: {
1562     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1563     // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1564     // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1565     unsigned NumElts =
1566         (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1567     NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1568     assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1569     return MVT::getScalableVectorVT(EltVT, NumElts);
1570   }
1571   }
1572 }
1573 
1574 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
1575                                             const RISCVSubtarget &Subtarget) {
1576   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1577                                           Subtarget);
1578 }
1579 
1580 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1581   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
1582 }
1583 
1584 // Grow V to consume an entire RVV register.
1585 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1586                                        const RISCVSubtarget &Subtarget) {
1587   assert(VT.isScalableVector() &&
1588          "Expected to convert into a scalable vector!");
1589   assert(V.getValueType().isFixedLengthVector() &&
1590          "Expected a fixed length vector operand!");
1591   SDLoc DL(V);
1592   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1593   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1594 }
1595 
1596 // Shrink V so it's just big enough to maintain a VT's worth of data.
1597 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1598                                          const RISCVSubtarget &Subtarget) {
1599   assert(VT.isFixedLengthVector() &&
1600          "Expected to convert into a fixed length vector!");
1601   assert(V.getValueType().isScalableVector() &&
1602          "Expected a scalable vector operand!");
1603   SDLoc DL(V);
1604   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1605   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1606 }
1607 
1608 /// Return the type of the mask type suitable for masking the provided
1609 /// vector type.  This is simply an i1 element type vector of the same
1610 /// (possibly scalable) length.
1611 static MVT getMaskTypeFor(EVT VecVT) {
1612   assert(VecVT.isVector());
1613   ElementCount EC = VecVT.getVectorElementCount();
1614   return MVT::getVectorVT(MVT::i1, EC);
1615 }
1616 
1617 /// Creates an all ones mask suitable for masking a vector of type VecTy with
1618 /// vector length VL.  .
1619 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, SDLoc DL,
1620                               SelectionDAG &DAG) {
1621   MVT MaskVT = getMaskTypeFor(VecVT);
1622   return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1623 }
1624 
1625 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1626 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1627 // the vector type that it is contained in.
1628 static std::pair<SDValue, SDValue>
1629 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1630                 const RISCVSubtarget &Subtarget) {
1631   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1632   MVT XLenVT = Subtarget.getXLenVT();
1633   SDValue VL = VecVT.isFixedLengthVector()
1634                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1635                    : DAG.getRegister(RISCV::X0, XLenVT);
1636   SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
1637   return {Mask, VL};
1638 }
1639 
1640 // As above but assuming the given type is a scalable vector type.
1641 static std::pair<SDValue, SDValue>
1642 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1643                         const RISCVSubtarget &Subtarget) {
1644   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1645   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1646 }
1647 
1648 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1649 // of either is (currently) supported. This can get us into an infinite loop
1650 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1651 // as a ..., etc.
1652 // Until either (or both) of these can reliably lower any node, reporting that
1653 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1654 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1655 // which is not desirable.
1656 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1657     EVT VT, unsigned DefinedValues) const {
1658   return false;
1659 }
1660 
1661 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
1662                                   const RISCVSubtarget &Subtarget) {
1663   // RISCV FP-to-int conversions saturate to the destination register size, but
1664   // don't produce 0 for nan. We can use a conversion instruction and fix the
1665   // nan case with a compare and a select.
1666   SDValue Src = Op.getOperand(0);
1667 
1668   EVT DstVT = Op.getValueType();
1669   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1670 
1671   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1672   unsigned Opc;
1673   if (SatVT == DstVT)
1674     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
1675   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1676     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
1677   else
1678     return SDValue();
1679   // FIXME: Support other SatVTs by clamping before or after the conversion.
1680 
1681   SDLoc DL(Op);
1682   SDValue FpToInt = DAG.getNode(
1683       Opc, DL, DstVT, Src,
1684       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
1685 
1686   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1687   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1688 }
1689 
1690 // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
1691 // and back. Taking care to avoid converting values that are nan or already
1692 // correct.
1693 // TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
1694 // have FRM dependencies modeled yet.
1695 static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
1696   MVT VT = Op.getSimpleValueType();
1697   assert(VT.isVector() && "Unexpected type");
1698 
1699   SDLoc DL(Op);
1700 
1701   // Freeze the source since we are increasing the number of uses.
1702   SDValue Src = DAG.getFreeze(Op.getOperand(0));
1703 
1704   // Truncate to integer and convert back to FP.
1705   MVT IntVT = VT.changeVectorElementTypeToInteger();
1706   SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src);
1707   Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1708 
1709   MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1710 
1711   if (Op.getOpcode() == ISD::FCEIL) {
1712     // If the truncated value is the greater than or equal to the original
1713     // value, we've computed the ceil. Otherwise, we went the wrong way and
1714     // need to increase by 1.
1715     // FIXME: This should use a masked operation. Handle here or in isel?
1716     SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated,
1717                                  DAG.getConstantFP(1.0, DL, VT));
1718     SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT);
1719     Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1720   } else if (Op.getOpcode() == ISD::FFLOOR) {
1721     // If the truncated value is the less than or equal to the original value,
1722     // we've computed the floor. Otherwise, we went the wrong way and need to
1723     // decrease by 1.
1724     // FIXME: This should use a masked operation. Handle here or in isel?
1725     SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated,
1726                                  DAG.getConstantFP(1.0, DL, VT));
1727     SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT);
1728     Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1729   }
1730 
1731   // Restore the original sign so that -0.0 is preserved.
1732   Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1733 
1734   // Determine the largest integer that can be represented exactly. This and
1735   // values larger than it don't have any fractional bits so don't need to
1736   // be converted.
1737   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1738   unsigned Precision = APFloat::semanticsPrecision(FltSem);
1739   APFloat MaxVal = APFloat(FltSem);
1740   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1741                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1742   SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1743 
1744   // If abs(Src) was larger than MaxVal or nan, keep it.
1745   SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1746   SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1747   return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1748 }
1749 
1750 // ISD::FROUND is defined to round to nearest with ties rounding away from 0.
1751 // This mode isn't supported in vector hardware on RISCV. But as long as we
1752 // aren't compiling with trapping math, we can emulate this with
1753 // floor(X + copysign(nextafter(0.5, 0.0), X)).
1754 // FIXME: Could be shorter by changing rounding mode, but we don't have FRM
1755 // dependencies modeled yet.
1756 // FIXME: Use masked operations to avoid final merge.
1757 static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG) {
1758   MVT VT = Op.getSimpleValueType();
1759   assert(VT.isVector() && "Unexpected type");
1760 
1761   SDLoc DL(Op);
1762 
1763   // Freeze the source since we are increasing the number of uses.
1764   SDValue Src = DAG.getFreeze(Op.getOperand(0));
1765 
1766   // We do the conversion on the absolute value and fix the sign at the end.
1767   SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1768 
1769   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1770   bool Ignored;
1771   APFloat Point5Pred = APFloat(0.5f);
1772   Point5Pred.convert(FltSem, APFloat::rmNearestTiesToEven, &Ignored);
1773   Point5Pred.next(/*nextDown*/ true);
1774 
1775   // Add the adjustment.
1776   SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Abs,
1777                                DAG.getConstantFP(Point5Pred, DL, VT));
1778 
1779   // Truncate to integer and convert back to fp.
1780   MVT IntVT = VT.changeVectorElementTypeToInteger();
1781   SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Adjust);
1782   Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1783 
1784   // Restore the original sign.
1785   Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1786 
1787   // Determine the largest integer that can be represented exactly. This and
1788   // values larger than it don't have any fractional bits so don't need to
1789   // be converted.
1790   unsigned Precision = APFloat::semanticsPrecision(FltSem);
1791   APFloat MaxVal = APFloat(FltSem);
1792   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1793                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1794   SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1795 
1796   // If abs(Src) was larger than MaxVal or nan, keep it.
1797   MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1798   SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1799   return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1800 }
1801 
1802 struct VIDSequence {
1803   int64_t StepNumerator;
1804   unsigned StepDenominator;
1805   int64_t Addend;
1806 };
1807 
1808 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1809 // to the (non-zero) step S and start value X. This can be then lowered as the
1810 // RVV sequence (VID * S) + X, for example.
1811 // The step S is represented as an integer numerator divided by a positive
1812 // denominator. Note that the implementation currently only identifies
1813 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1814 // cannot detect 2/3, for example.
1815 // Note that this method will also match potentially unappealing index
1816 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1817 // determine whether this is worth generating code for.
1818 static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
1819   unsigned NumElts = Op.getNumOperands();
1820   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1821   if (!Op.getValueType().isInteger())
1822     return None;
1823 
1824   Optional<unsigned> SeqStepDenom;
1825   Optional<int64_t> SeqStepNum, SeqAddend;
1826   Optional<std::pair<uint64_t, unsigned>> PrevElt;
1827   unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1828   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1829     // Assume undef elements match the sequence; we just have to be careful
1830     // when interpolating across them.
1831     if (Op.getOperand(Idx).isUndef())
1832       continue;
1833     // The BUILD_VECTOR must be all constants.
1834     if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1835       return None;
1836 
1837     uint64_t Val = Op.getConstantOperandVal(Idx) &
1838                    maskTrailingOnes<uint64_t>(EltSizeInBits);
1839 
1840     if (PrevElt) {
1841       // Calculate the step since the last non-undef element, and ensure
1842       // it's consistent across the entire sequence.
1843       unsigned IdxDiff = Idx - PrevElt->second;
1844       int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1845 
1846       // A zero-value value difference means that we're somewhere in the middle
1847       // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1848       // step change before evaluating the sequence.
1849       if (ValDiff == 0)
1850         continue;
1851 
1852       int64_t Remainder = ValDiff % IdxDiff;
1853       // Normalize the step if it's greater than 1.
1854       if (Remainder != ValDiff) {
1855         // The difference must cleanly divide the element span.
1856         if (Remainder != 0)
1857           return None;
1858         ValDiff /= IdxDiff;
1859         IdxDiff = 1;
1860       }
1861 
1862       if (!SeqStepNum)
1863         SeqStepNum = ValDiff;
1864       else if (ValDiff != SeqStepNum)
1865         return None;
1866 
1867       if (!SeqStepDenom)
1868         SeqStepDenom = IdxDiff;
1869       else if (IdxDiff != *SeqStepDenom)
1870         return None;
1871     }
1872 
1873     // Record this non-undef element for later.
1874     if (!PrevElt || PrevElt->first != Val)
1875       PrevElt = std::make_pair(Val, Idx);
1876   }
1877 
1878   // We need to have logged a step for this to count as a legal index sequence.
1879   if (!SeqStepNum || !SeqStepDenom)
1880     return None;
1881 
1882   // Loop back through the sequence and validate elements we might have skipped
1883   // while waiting for a valid step. While doing this, log any sequence addend.
1884   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1885     if (Op.getOperand(Idx).isUndef())
1886       continue;
1887     uint64_t Val = Op.getConstantOperandVal(Idx) &
1888                    maskTrailingOnes<uint64_t>(EltSizeInBits);
1889     uint64_t ExpectedVal =
1890         (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1891     int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1892     if (!SeqAddend)
1893       SeqAddend = Addend;
1894     else if (Addend != SeqAddend)
1895       return None;
1896   }
1897 
1898   assert(SeqAddend && "Must have an addend if we have a step");
1899 
1900   return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
1901 }
1902 
1903 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
1904 // and lower it as a VRGATHER_VX_VL from the source vector.
1905 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
1906                                   SelectionDAG &DAG,
1907                                   const RISCVSubtarget &Subtarget) {
1908   if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1909     return SDValue();
1910   SDValue Vec = SplatVal.getOperand(0);
1911   // Only perform this optimization on vectors of the same size for simplicity.
1912   if (Vec.getValueType() != VT)
1913     return SDValue();
1914   SDValue Idx = SplatVal.getOperand(1);
1915   // The index must be a legal type.
1916   if (Idx.getValueType() != Subtarget.getXLenVT())
1917     return SDValue();
1918 
1919   MVT ContainerVT = VT;
1920   if (VT.isFixedLengthVector()) {
1921     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1922     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
1923   }
1924 
1925   SDValue Mask, VL;
1926   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1927 
1928   SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
1929                                Idx, Mask, VL);
1930 
1931   if (!VT.isFixedLengthVector())
1932     return Gather;
1933 
1934   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1935 }
1936 
1937 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
1938                                  const RISCVSubtarget &Subtarget) {
1939   MVT VT = Op.getSimpleValueType();
1940   assert(VT.isFixedLengthVector() && "Unexpected vector!");
1941 
1942   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1943 
1944   SDLoc DL(Op);
1945   SDValue Mask, VL;
1946   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1947 
1948   MVT XLenVT = Subtarget.getXLenVT();
1949   unsigned NumElts = Op.getNumOperands();
1950 
1951   if (VT.getVectorElementType() == MVT::i1) {
1952     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1953       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1954       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1955     }
1956 
1957     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1958       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1959       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1960     }
1961 
1962     // Lower constant mask BUILD_VECTORs via an integer vector type, in
1963     // scalar integer chunks whose bit-width depends on the number of mask
1964     // bits and XLEN.
1965     // First, determine the most appropriate scalar integer type to use. This
1966     // is at most XLenVT, but may be shrunk to a smaller vector element type
1967     // according to the size of the final vector - use i8 chunks rather than
1968     // XLenVT if we're producing a v8i1. This results in more consistent
1969     // codegen across RV32 and RV64.
1970     unsigned NumViaIntegerBits =
1971         std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1972     NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
1973     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1974       // If we have to use more than one INSERT_VECTOR_ELT then this
1975       // optimization is likely to increase code size; avoid peforming it in
1976       // such a case. We can use a load from a constant pool in this case.
1977       if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1978         return SDValue();
1979       // Now we can create our integer vector type. Note that it may be larger
1980       // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1981       MVT IntegerViaVecVT =
1982           MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1983                            divideCeil(NumElts, NumViaIntegerBits));
1984 
1985       uint64_t Bits = 0;
1986       unsigned BitPos = 0, IntegerEltIdx = 0;
1987       SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1988 
1989       for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1990         // Once we accumulate enough bits to fill our scalar type, insert into
1991         // our vector and clear our accumulated data.
1992         if (I != 0 && I % NumViaIntegerBits == 0) {
1993           if (NumViaIntegerBits <= 32)
1994             Bits = SignExtend64<32>(Bits);
1995           SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1996           Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1997                             Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1998           Bits = 0;
1999           BitPos = 0;
2000           IntegerEltIdx++;
2001         }
2002         SDValue V = Op.getOperand(I);
2003         bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
2004         Bits |= ((uint64_t)BitValue << BitPos);
2005       }
2006 
2007       // Insert the (remaining) scalar value into position in our integer
2008       // vector type.
2009       if (NumViaIntegerBits <= 32)
2010         Bits = SignExtend64<32>(Bits);
2011       SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2012       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
2013                         DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2014 
2015       if (NumElts < NumViaIntegerBits) {
2016         // If we're producing a smaller vector than our minimum legal integer
2017         // type, bitcast to the equivalent (known-legal) mask type, and extract
2018         // our final mask.
2019         assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
2020         Vec = DAG.getBitcast(MVT::v8i1, Vec);
2021         Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
2022                           DAG.getConstant(0, DL, XLenVT));
2023       } else {
2024         // Else we must have produced an integer type with the same size as the
2025         // mask type; bitcast for the final result.
2026         assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
2027         Vec = DAG.getBitcast(VT, Vec);
2028       }
2029 
2030       return Vec;
2031     }
2032 
2033     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
2034     // vector type, we have a legal equivalently-sized i8 type, so we can use
2035     // that.
2036     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
2037     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
2038 
2039     SDValue WideVec;
2040     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2041       // For a splat, perform a scalar truncate before creating the wider
2042       // vector.
2043       assert(Splat.getValueType() == XLenVT &&
2044              "Unexpected type for i1 splat value");
2045       Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
2046                           DAG.getConstant(1, DL, XLenVT));
2047       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
2048     } else {
2049       SmallVector<SDValue, 8> Ops(Op->op_values());
2050       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
2051       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
2052       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
2053     }
2054 
2055     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
2056   }
2057 
2058   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2059     if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
2060       return Gather;
2061     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
2062                                         : RISCVISD::VMV_V_X_VL;
2063     Splat =
2064         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
2065     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2066   }
2067 
2068   // Try and match index sequences, which we can lower to the vid instruction
2069   // with optional modifications. An all-undef vector is matched by
2070   // getSplatValue, above.
2071   if (auto SimpleVID = isSimpleVIDSequence(Op)) {
2072     int64_t StepNumerator = SimpleVID->StepNumerator;
2073     unsigned StepDenominator = SimpleVID->StepDenominator;
2074     int64_t Addend = SimpleVID->Addend;
2075 
2076     assert(StepNumerator != 0 && "Invalid step");
2077     bool Negate = false;
2078     int64_t SplatStepVal = StepNumerator;
2079     unsigned StepOpcode = ISD::MUL;
2080     if (StepNumerator != 1) {
2081       if (isPowerOf2_64(std::abs(StepNumerator))) {
2082         Negate = StepNumerator < 0;
2083         StepOpcode = ISD::SHL;
2084         SplatStepVal = Log2_64(std::abs(StepNumerator));
2085       }
2086     }
2087 
2088     // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
2089     // threshold since it's the immediate value many RVV instructions accept.
2090     // There is no vmul.vi instruction so ensure multiply constant can fit in
2091     // a single addi instruction.
2092     if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
2093          (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
2094         isPowerOf2_32(StepDenominator) &&
2095         (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
2096       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
2097       // Convert right out of the scalable type so we can use standard ISD
2098       // nodes for the rest of the computation. If we used scalable types with
2099       // these, we'd lose the fixed-length vector info and generate worse
2100       // vsetvli code.
2101       VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
2102       if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
2103           (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
2104         SDValue SplatStep = DAG.getSplatBuildVector(
2105             VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
2106         VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
2107       }
2108       if (StepDenominator != 1) {
2109         SDValue SplatStep = DAG.getSplatBuildVector(
2110             VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
2111         VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
2112       }
2113       if (Addend != 0 || Negate) {
2114         SDValue SplatAddend = DAG.getSplatBuildVector(
2115             VT, DL, DAG.getConstant(Addend, DL, XLenVT));
2116         VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
2117       }
2118       return VID;
2119     }
2120   }
2121 
2122   // Attempt to detect "hidden" splats, which only reveal themselves as splats
2123   // when re-interpreted as a vector with a larger element type. For example,
2124   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
2125   // could be instead splat as
2126   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
2127   // TODO: This optimization could also work on non-constant splats, but it
2128   // would require bit-manipulation instructions to construct the splat value.
2129   SmallVector<SDValue> Sequence;
2130   unsigned EltBitSize = VT.getScalarSizeInBits();
2131   const auto *BV = cast<BuildVectorSDNode>(Op);
2132   if (VT.isInteger() && EltBitSize < 64 &&
2133       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
2134       BV->getRepeatedSequence(Sequence) &&
2135       (Sequence.size() * EltBitSize) <= 64) {
2136     unsigned SeqLen = Sequence.size();
2137     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
2138     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
2139     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
2140             ViaIntVT == MVT::i64) &&
2141            "Unexpected sequence type");
2142 
2143     unsigned EltIdx = 0;
2144     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2145     uint64_t SplatValue = 0;
2146     // Construct the amalgamated value which can be splatted as this larger
2147     // vector type.
2148     for (const auto &SeqV : Sequence) {
2149       if (!SeqV.isUndef())
2150         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2151                        << (EltIdx * EltBitSize));
2152       EltIdx++;
2153     }
2154 
2155     // On RV64, sign-extend from 32 to 64 bits where possible in order to
2156     // achieve better constant materializion.
2157     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
2158       SplatValue = SignExtend64<32>(SplatValue);
2159 
2160     // Since we can't introduce illegal i64 types at this stage, we can only
2161     // perform an i64 splat on RV32 if it is its own sign-extended value. That
2162     // way we can use RVV instructions to splat.
2163     assert((ViaIntVT.bitsLE(XLenVT) ||
2164             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
2165            "Unexpected bitcast sequence");
2166     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2167       SDValue ViaVL =
2168           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
2169       MVT ViaContainerVT =
2170           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
2171       SDValue Splat =
2172           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
2173                       DAG.getUNDEF(ViaContainerVT),
2174                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
2175       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
2176       return DAG.getBitcast(VT, Splat);
2177     }
2178   }
2179 
2180   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
2181   // which constitute a large proportion of the elements. In such cases we can
2182   // splat a vector with the dominant element and make up the shortfall with
2183   // INSERT_VECTOR_ELTs.
2184   // Note that this includes vectors of 2 elements by association. The
2185   // upper-most element is the "dominant" one, allowing us to use a splat to
2186   // "insert" the upper element, and an insert of the lower element at position
2187   // 0, which improves codegen.
2188   SDValue DominantValue;
2189   unsigned MostCommonCount = 0;
2190   DenseMap<SDValue, unsigned> ValueCounts;
2191   unsigned NumUndefElts =
2192       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
2193 
2194   // Track the number of scalar loads we know we'd be inserting, estimated as
2195   // any non-zero floating-point constant. Other kinds of element are either
2196   // already in registers or are materialized on demand. The threshold at which
2197   // a vector load is more desirable than several scalar materializion and
2198   // vector-insertion instructions is not known.
2199   unsigned NumScalarLoads = 0;
2200 
2201   for (SDValue V : Op->op_values()) {
2202     if (V.isUndef())
2203       continue;
2204 
2205     ValueCounts.insert(std::make_pair(V, 0));
2206     unsigned &Count = ValueCounts[V];
2207 
2208     if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2209       NumScalarLoads += !CFP->isExactlyValue(+0.0);
2210 
2211     // Is this value dominant? In case of a tie, prefer the highest element as
2212     // it's cheaper to insert near the beginning of a vector than it is at the
2213     // end.
2214     if (++Count >= MostCommonCount) {
2215       DominantValue = V;
2216       MostCommonCount = Count;
2217     }
2218   }
2219 
2220   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
2221   unsigned NumDefElts = NumElts - NumUndefElts;
2222   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2223 
2224   // Don't perform this optimization when optimizing for size, since
2225   // materializing elements and inserting them tends to cause code bloat.
2226   if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
2227       ((MostCommonCount > DominantValueCountThreshold) ||
2228        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
2229     // Start by splatting the most common element.
2230     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
2231 
2232     DenseSet<SDValue> Processed{DominantValue};
2233     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
2234     for (const auto &OpIdx : enumerate(Op->ops())) {
2235       const SDValue &V = OpIdx.value();
2236       if (V.isUndef() || !Processed.insert(V).second)
2237         continue;
2238       if (ValueCounts[V] == 1) {
2239         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
2240                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
2241       } else {
2242         // Blend in all instances of this value using a VSELECT, using a
2243         // mask where each bit signals whether that element is the one
2244         // we're after.
2245         SmallVector<SDValue> Ops;
2246         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
2247           return DAG.getConstant(V == V1, DL, XLenVT);
2248         });
2249         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
2250                           DAG.getBuildVector(SelMaskTy, DL, Ops),
2251                           DAG.getSplatBuildVector(VT, DL, V), Vec);
2252       }
2253     }
2254 
2255     return Vec;
2256   }
2257 
2258   return SDValue();
2259 }
2260 
2261 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2262                                    SDValue Lo, SDValue Hi, SDValue VL,
2263                                    SelectionDAG &DAG) {
2264   if (!Passthru)
2265     Passthru = DAG.getUNDEF(VT);
2266   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2267     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2268     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2269     // If Hi constant is all the same sign bit as Lo, lower this as a custom
2270     // node in order to try and match RVV vector/scalar instructions.
2271     if ((LoC >> 31) == HiC)
2272       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
2273 
2274     // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
2275     // vmv.v.x whose EEW = 32 to lower it.
2276     auto *Const = dyn_cast<ConstantSDNode>(VL);
2277     if (LoC == HiC && Const && Const->isAllOnesValue()) {
2278       MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
2279       // TODO: if vl <= min(VLMAX), we can also do this. But we could not
2280       // access the subtarget here now.
2281       auto InterVec = DAG.getNode(
2282           RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
2283                                   DAG.getRegister(RISCV::X0, MVT::i32));
2284       return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
2285     }
2286   }
2287 
2288   // Fall back to a stack store and stride x0 vector load.
2289   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
2290                      Hi, VL);
2291 }
2292 
2293 // Called by type legalization to handle splat of i64 on RV32.
2294 // FIXME: We can optimize this when the type has sign or zero bits in one
2295 // of the halves.
2296 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2297                                    SDValue Scalar, SDValue VL,
2298                                    SelectionDAG &DAG) {
2299   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2300   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2301                            DAG.getConstant(0, DL, MVT::i32));
2302   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2303                            DAG.getConstant(1, DL, MVT::i32));
2304   return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
2305 }
2306 
2307 // This function lowers a splat of a scalar operand Splat with the vector
2308 // length VL. It ensures the final sequence is type legal, which is useful when
2309 // lowering a splat after type legalization.
2310 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
2311                                 MVT VT, SDLoc DL, SelectionDAG &DAG,
2312                                 const RISCVSubtarget &Subtarget) {
2313   bool HasPassthru = Passthru && !Passthru.isUndef();
2314   if (!HasPassthru && !Passthru)
2315     Passthru = DAG.getUNDEF(VT);
2316   if (VT.isFloatingPoint()) {
2317     // If VL is 1, we could use vfmv.s.f.
2318     if (isOneConstant(VL))
2319       return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
2320     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
2321   }
2322 
2323   MVT XLenVT = Subtarget.getXLenVT();
2324 
2325   // Simplest case is that the operand needs to be promoted to XLenVT.
2326   if (Scalar.getValueType().bitsLE(XLenVT)) {
2327     // If the operand is a constant, sign extend to increase our chances
2328     // of being able to use a .vi instruction. ANY_EXTEND would become a
2329     // a zero extend and the simm5 check in isel would fail.
2330     // FIXME: Should we ignore the upper bits in isel instead?
2331     unsigned ExtOpc =
2332         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2333     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2334     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
2335     // If VL is 1 and the scalar value won't benefit from immediate, we could
2336     // use vmv.s.x.
2337     if (isOneConstant(VL) &&
2338         (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2339       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
2340     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
2341   }
2342 
2343   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2344          "Unexpected scalar for splat lowering!");
2345 
2346   if (isOneConstant(VL) && isNullConstant(Scalar))
2347     return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
2348                        DAG.getConstant(0, DL, XLenVT), VL);
2349 
2350   // Otherwise use the more complicated splatting algorithm.
2351   return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
2352 }
2353 
2354 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
2355                                 const RISCVSubtarget &Subtarget) {
2356   // We need to be able to widen elements to the next larger integer type.
2357   if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
2358     return false;
2359 
2360   int Size = Mask.size();
2361   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
2362 
2363   int Srcs[] = {-1, -1};
2364   for (int i = 0; i != Size; ++i) {
2365     // Ignore undef elements.
2366     if (Mask[i] < 0)
2367       continue;
2368 
2369     // Is this an even or odd element.
2370     int Pol = i % 2;
2371 
2372     // Ensure we consistently use the same source for this element polarity.
2373     int Src = Mask[i] / Size;
2374     if (Srcs[Pol] < 0)
2375       Srcs[Pol] = Src;
2376     if (Srcs[Pol] != Src)
2377       return false;
2378 
2379     // Make sure the element within the source is appropriate for this element
2380     // in the destination.
2381     int Elt = Mask[i] % Size;
2382     if (Elt != i / 2)
2383       return false;
2384   }
2385 
2386   // We need to find a source for each polarity and they can't be the same.
2387   if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
2388     return false;
2389 
2390   // Swap the sources if the second source was in the even polarity.
2391   SwapSources = Srcs[0] > Srcs[1];
2392 
2393   return true;
2394 }
2395 
2396 /// Match shuffles that concatenate two vectors, rotate the concatenation,
2397 /// and then extract the original number of elements from the rotated result.
2398 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
2399 /// returned rotation amount is for a rotate right, where elements move from
2400 /// higher elements to lower elements. \p LoSrc indicates the first source
2401 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
2402 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
2403 /// 0 or 1 if a rotation is found.
2404 ///
2405 /// NOTE: We talk about rotate to the right which matches how bit shift and
2406 /// rotate instructions are described where LSBs are on the right, but LLVM IR
2407 /// and the table below write vectors with the lowest elements on the left.
2408 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
2409   int Size = Mask.size();
2410 
2411   // We need to detect various ways of spelling a rotation:
2412   //   [11, 12, 13, 14, 15,  0,  1,  2]
2413   //   [-1, 12, 13, 14, -1, -1,  1, -1]
2414   //   [-1, -1, -1, -1, -1, -1,  1,  2]
2415   //   [ 3,  4,  5,  6,  7,  8,  9, 10]
2416   //   [-1,  4,  5,  6, -1, -1,  9, -1]
2417   //   [-1,  4,  5,  6, -1, -1, -1, -1]
2418   int Rotation = 0;
2419   LoSrc = -1;
2420   HiSrc = -1;
2421   for (int i = 0; i != Size; ++i) {
2422     int M = Mask[i];
2423     if (M < 0)
2424       continue;
2425 
2426     // Determine where a rotate vector would have started.
2427     int StartIdx = i - (M % Size);
2428     // The identity rotation isn't interesting, stop.
2429     if (StartIdx == 0)
2430       return -1;
2431 
2432     // If we found the tail of a vector the rotation must be the missing
2433     // front. If we found the head of a vector, it must be how much of the
2434     // head.
2435     int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
2436 
2437     if (Rotation == 0)
2438       Rotation = CandidateRotation;
2439     else if (Rotation != CandidateRotation)
2440       // The rotations don't match, so we can't match this mask.
2441       return -1;
2442 
2443     // Compute which value this mask is pointing at.
2444     int MaskSrc = M < Size ? 0 : 1;
2445 
2446     // Compute which of the two target values this index should be assigned to.
2447     // This reflects whether the high elements are remaining or the low elemnts
2448     // are remaining.
2449     int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
2450 
2451     // Either set up this value if we've not encountered it before, or check
2452     // that it remains consistent.
2453     if (TargetSrc < 0)
2454       TargetSrc = MaskSrc;
2455     else if (TargetSrc != MaskSrc)
2456       // This may be a rotation, but it pulls from the inputs in some
2457       // unsupported interleaving.
2458       return -1;
2459   }
2460 
2461   // Check that we successfully analyzed the mask, and normalize the results.
2462   assert(Rotation != 0 && "Failed to locate a viable rotation!");
2463   assert((LoSrc >= 0 || HiSrc >= 0) &&
2464          "Failed to find a rotated input vector!");
2465 
2466   return Rotation;
2467 }
2468 
2469 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
2470                                    const RISCVSubtarget &Subtarget) {
2471   SDValue V1 = Op.getOperand(0);
2472   SDValue V2 = Op.getOperand(1);
2473   SDLoc DL(Op);
2474   MVT XLenVT = Subtarget.getXLenVT();
2475   MVT VT = Op.getSimpleValueType();
2476   unsigned NumElts = VT.getVectorNumElements();
2477   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
2478 
2479   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2480 
2481   SDValue TrueMask, VL;
2482   std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2483 
2484   if (SVN->isSplat()) {
2485     const int Lane = SVN->getSplatIndex();
2486     if (Lane >= 0) {
2487       MVT SVT = VT.getVectorElementType();
2488 
2489       // Turn splatted vector load into a strided load with an X0 stride.
2490       SDValue V = V1;
2491       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
2492       // with undef.
2493       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
2494       int Offset = Lane;
2495       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
2496         int OpElements =
2497             V.getOperand(0).getSimpleValueType().getVectorNumElements();
2498         V = V.getOperand(Offset / OpElements);
2499         Offset %= OpElements;
2500       }
2501 
2502       // We need to ensure the load isn't atomic or volatile.
2503       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
2504         auto *Ld = cast<LoadSDNode>(V);
2505         Offset *= SVT.getStoreSize();
2506         SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2507                                                    TypeSize::Fixed(Offset), DL);
2508 
2509         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2510         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2511           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2512           SDValue IntID =
2513               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2514           SDValue Ops[] = {Ld->getChain(),
2515                            IntID,
2516                            DAG.getUNDEF(ContainerVT),
2517                            NewAddr,
2518                            DAG.getRegister(RISCV::X0, XLenVT),
2519                            VL};
2520           SDValue NewLoad = DAG.getMemIntrinsicNode(
2521               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2522               DAG.getMachineFunction().getMachineMemOperand(
2523                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2524           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2525           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2526         }
2527 
2528         // Otherwise use a scalar load and splat. This will give the best
2529         // opportunity to fold a splat into the operation. ISel can turn it into
2530         // the x0 strided load if we aren't able to fold away the select.
2531         if (SVT.isFloatingPoint())
2532           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2533                           Ld->getPointerInfo().getWithOffset(Offset),
2534                           Ld->getOriginalAlign(),
2535                           Ld->getMemOperand()->getFlags());
2536         else
2537           V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2538                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
2539                              Ld->getOriginalAlign(),
2540                              Ld->getMemOperand()->getFlags());
2541         DAG.makeEquivalentMemoryOrdering(Ld, V);
2542 
2543         unsigned Opc =
2544             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
2545         SDValue Splat =
2546             DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
2547         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2548       }
2549 
2550       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2551       assert(Lane < (int)NumElts && "Unexpected lane!");
2552       SDValue Gather =
2553           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
2554                       DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
2555       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2556     }
2557   }
2558 
2559   ArrayRef<int> Mask = SVN->getMask();
2560 
2561   // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
2562   // be undef which can be handled with a single SLIDEDOWN/UP.
2563   int LoSrc, HiSrc;
2564   int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
2565   if (Rotation > 0) {
2566     SDValue LoV, HiV;
2567     if (LoSrc >= 0) {
2568       LoV = LoSrc == 0 ? V1 : V2;
2569       LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
2570     }
2571     if (HiSrc >= 0) {
2572       HiV = HiSrc == 0 ? V1 : V2;
2573       HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
2574     }
2575 
2576     // We found a rotation. We need to slide HiV down by Rotation. Then we need
2577     // to slide LoV up by (NumElts - Rotation).
2578     unsigned InvRotate = NumElts - Rotation;
2579 
2580     SDValue Res = DAG.getUNDEF(ContainerVT);
2581     if (HiV) {
2582       // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN.
2583       // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it
2584       // causes multiple vsetvlis in some test cases such as lowering
2585       // reduce.mul
2586       SDValue DownVL = VL;
2587       if (LoV)
2588         DownVL = DAG.getConstant(InvRotate, DL, XLenVT);
2589       Res =
2590           DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, Res, HiV,
2591                       DAG.getConstant(Rotation, DL, XLenVT), TrueMask, DownVL);
2592     }
2593     if (LoV)
2594       Res = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Res, LoV,
2595                         DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL);
2596 
2597     return convertFromScalableVector(VT, Res, DAG, Subtarget);
2598   }
2599 
2600   // Detect an interleave shuffle and lower to
2601   // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
2602   bool SwapSources;
2603   if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
2604     // Swap sources if needed.
2605     if (SwapSources)
2606       std::swap(V1, V2);
2607 
2608     // Extract the lower half of the vectors.
2609     MVT HalfVT = VT.getHalfNumVectorElementsVT();
2610     V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
2611                      DAG.getConstant(0, DL, XLenVT));
2612     V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
2613                      DAG.getConstant(0, DL, XLenVT));
2614 
2615     // Double the element width and halve the number of elements in an int type.
2616     unsigned EltBits = VT.getScalarSizeInBits();
2617     MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
2618     MVT WideIntVT =
2619         MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
2620     // Convert this to a scalable vector. We need to base this on the
2621     // destination size to ensure there's always a type with a smaller LMUL.
2622     MVT WideIntContainerVT =
2623         getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
2624 
2625     // Convert sources to scalable vectors with the same element count as the
2626     // larger type.
2627     MVT HalfContainerVT = MVT::getVectorVT(
2628         VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
2629     V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
2630     V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
2631 
2632     // Cast sources to integer.
2633     MVT IntEltVT = MVT::getIntegerVT(EltBits);
2634     MVT IntHalfVT =
2635         MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
2636     V1 = DAG.getBitcast(IntHalfVT, V1);
2637     V2 = DAG.getBitcast(IntHalfVT, V2);
2638 
2639     // Freeze V2 since we use it twice and we need to be sure that the add and
2640     // multiply see the same value.
2641     V2 = DAG.getFreeze(V2);
2642 
2643     // Recreate TrueMask using the widened type's element count.
2644     TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG);
2645 
2646     // Widen V1 and V2 with 0s and add one copy of V2 to V1.
2647     SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
2648                               V2, TrueMask, VL);
2649     // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
2650     SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
2651                                      DAG.getUNDEF(IntHalfVT),
2652                                      DAG.getAllOnesConstant(DL, XLenVT));
2653     SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
2654                                    V2, Multiplier, TrueMask, VL);
2655     // Add the new copies to our previous addition giving us 2^eltbits copies of
2656     // V2. This is equivalent to shifting V2 left by eltbits. This should
2657     // combine with the vwmulu.vv above to form vwmaccu.vv.
2658     Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
2659                       TrueMask, VL);
2660     // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
2661     // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
2662     // vector VT.
2663     ContainerVT =
2664         MVT::getVectorVT(VT.getVectorElementType(),
2665                          WideIntContainerVT.getVectorElementCount() * 2);
2666     Add = DAG.getBitcast(ContainerVT, Add);
2667     return convertFromScalableVector(VT, Add, DAG, Subtarget);
2668   }
2669 
2670   // Detect shuffles which can be re-expressed as vector selects; these are
2671   // shuffles in which each element in the destination is taken from an element
2672   // at the corresponding index in either source vectors.
2673   bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
2674     int MaskIndex = MaskIdx.value();
2675     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2676   });
2677 
2678   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2679 
2680   SmallVector<SDValue> MaskVals;
2681   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2682   // merged with a second vrgather.
2683   SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2684 
2685   // By default we preserve the original operand order, and use a mask to
2686   // select LHS as true and RHS as false. However, since RVV vector selects may
2687   // feature splats but only on the LHS, we may choose to invert our mask and
2688   // instead select between RHS and LHS.
2689   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2690   bool InvertMask = IsSelect == SwapOps;
2691 
2692   // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2693   // half.
2694   DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2695 
2696   // Now construct the mask that will be used by the vselect or blended
2697   // vrgather operation. For vrgathers, construct the appropriate indices into
2698   // each vector.
2699   for (int MaskIndex : Mask) {
2700     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2701     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2702     if (!IsSelect) {
2703       bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2704       GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2705                                      ? DAG.getConstant(MaskIndex, DL, XLenVT)
2706                                      : DAG.getUNDEF(XLenVT));
2707       GatherIndicesRHS.push_back(
2708           IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2709                             : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2710       if (IsLHSOrUndefIndex && MaskIndex >= 0)
2711         ++LHSIndexCounts[MaskIndex];
2712       if (!IsLHSOrUndefIndex)
2713         ++RHSIndexCounts[MaskIndex - NumElts];
2714     }
2715   }
2716 
2717   if (SwapOps) {
2718     std::swap(V1, V2);
2719     std::swap(GatherIndicesLHS, GatherIndicesRHS);
2720   }
2721 
2722   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2723   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2724   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2725 
2726   if (IsSelect)
2727     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2728 
2729   if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2730     // On such a large vector we're unable to use i8 as the index type.
2731     // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2732     // may involve vector splitting if we're already at LMUL=8, or our
2733     // user-supplied maximum fixed-length LMUL.
2734     return SDValue();
2735   }
2736 
2737   unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2738   unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2739   MVT IndexVT = VT.changeTypeToInteger();
2740   // Since we can't introduce illegal index types at this stage, use i16 and
2741   // vrgatherei16 if the corresponding index type for plain vrgather is greater
2742   // than XLenVT.
2743   if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2744     GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2745     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2746   }
2747 
2748   MVT IndexContainerVT =
2749       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2750 
2751   SDValue Gather;
2752   // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2753   // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2754   if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2755     Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
2756                               Subtarget);
2757   } else {
2758     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2759     // If only one index is used, we can use a "splat" vrgather.
2760     // TODO: We can splat the most-common index and fix-up any stragglers, if
2761     // that's beneficial.
2762     if (LHSIndexCounts.size() == 1) {
2763       int SplatIndex = LHSIndexCounts.begin()->getFirst();
2764       Gather =
2765           DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2766                       DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2767     } else {
2768       SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2769       LHSIndices =
2770           convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2771 
2772       Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2773                            TrueMask, VL);
2774     }
2775   }
2776 
2777   // If a second vector operand is used by this shuffle, blend it in with an
2778   // additional vrgather.
2779   if (!V2.isUndef()) {
2780     V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2781     // If only one index is used, we can use a "splat" vrgather.
2782     // TODO: We can splat the most-common index and fix-up any stragglers, if
2783     // that's beneficial.
2784     if (RHSIndexCounts.size() == 1) {
2785       int SplatIndex = RHSIndexCounts.begin()->getFirst();
2786       V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2787                        DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2788     } else {
2789       SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2790       RHSIndices =
2791           convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2792       V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
2793                        VL);
2794     }
2795 
2796     MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2797     SelectMask =
2798         convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2799 
2800     Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
2801                          Gather, VL);
2802   }
2803 
2804   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2805 }
2806 
2807 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
2808   // Support splats for any type. These should type legalize well.
2809   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
2810     return true;
2811 
2812   // Only support legal VTs for other shuffles for now.
2813   if (!isTypeLegal(VT))
2814     return false;
2815 
2816   MVT SVT = VT.getSimpleVT();
2817 
2818   bool SwapSources;
2819   int LoSrc, HiSrc;
2820   return (isElementRotate(LoSrc, HiSrc, M) > 0) ||
2821          isInterleaveShuffle(M, SVT, SwapSources, Subtarget);
2822 }
2823 
2824 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
2825 // the exponent.
2826 static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
2827   MVT VT = Op.getSimpleValueType();
2828   unsigned EltSize = VT.getScalarSizeInBits();
2829   SDValue Src = Op.getOperand(0);
2830   SDLoc DL(Op);
2831 
2832   // We need a FP type that can represent the value.
2833   // TODO: Use f16 for i8 when possible?
2834   MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
2835   MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
2836 
2837   // Legal types should have been checked in the RISCVTargetLowering
2838   // constructor.
2839   // TODO: Splitting may make sense in some cases.
2840   assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
2841          "Expected legal float type!");
2842 
2843   // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
2844   // The trailing zero count is equal to log2 of this single bit value.
2845   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
2846     SDValue Neg =
2847         DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
2848     Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
2849   }
2850 
2851   // We have a legal FP type, convert to it.
2852   SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
2853   // Bitcast to integer and shift the exponent to the LSB.
2854   EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
2855   SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
2856   unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
2857   SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
2858                               DAG.getConstant(ShiftAmt, DL, IntVT));
2859   // Truncate back to original type to allow vnsrl.
2860   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
2861   // The exponent contains log2 of the value in biased form.
2862   unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
2863 
2864   // For trailing zeros, we just need to subtract the bias.
2865   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
2866     return DAG.getNode(ISD::SUB, DL, VT, Trunc,
2867                        DAG.getConstant(ExponentBias, DL, VT));
2868 
2869   // For leading zeros, we need to remove the bias and convert from log2 to
2870   // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
2871   unsigned Adjust = ExponentBias + (EltSize - 1);
2872   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
2873 }
2874 
2875 // While RVV has alignment restrictions, we should always be able to load as a
2876 // legal equivalently-sized byte-typed vector instead. This method is
2877 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2878 // the load is already correctly-aligned, it returns SDValue().
2879 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2880                                                     SelectionDAG &DAG) const {
2881   auto *Load = cast<LoadSDNode>(Op);
2882   assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2883 
2884   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
2885                                      Load->getMemoryVT(),
2886                                      *Load->getMemOperand()))
2887     return SDValue();
2888 
2889   SDLoc DL(Op);
2890   MVT VT = Op.getSimpleValueType();
2891   unsigned EltSizeBits = VT.getScalarSizeInBits();
2892   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2893          "Unexpected unaligned RVV load type");
2894   MVT NewVT =
2895       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2896   assert(NewVT.isValid() &&
2897          "Expecting equally-sized RVV vector types to be legal");
2898   SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2899                           Load->getPointerInfo(), Load->getOriginalAlign(),
2900                           Load->getMemOperand()->getFlags());
2901   return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2902 }
2903 
2904 // While RVV has alignment restrictions, we should always be able to store as a
2905 // legal equivalently-sized byte-typed vector instead. This method is
2906 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2907 // returns SDValue() if the store is already correctly aligned.
2908 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2909                                                      SelectionDAG &DAG) const {
2910   auto *Store = cast<StoreSDNode>(Op);
2911   assert(Store && Store->getValue().getValueType().isVector() &&
2912          "Expected vector store");
2913 
2914   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
2915                                      Store->getMemoryVT(),
2916                                      *Store->getMemOperand()))
2917     return SDValue();
2918 
2919   SDLoc DL(Op);
2920   SDValue StoredVal = Store->getValue();
2921   MVT VT = StoredVal.getSimpleValueType();
2922   unsigned EltSizeBits = VT.getScalarSizeInBits();
2923   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2924          "Unexpected unaligned RVV store type");
2925   MVT NewVT =
2926       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2927   assert(NewVT.isValid() &&
2928          "Expecting equally-sized RVV vector types to be legal");
2929   StoredVal = DAG.getBitcast(NewVT, StoredVal);
2930   return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2931                       Store->getPointerInfo(), Store->getOriginalAlign(),
2932                       Store->getMemOperand()->getFlags());
2933 }
2934 
2935 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
2936                                             SelectionDAG &DAG) const {
2937   switch (Op.getOpcode()) {
2938   default:
2939     report_fatal_error("unimplemented operand");
2940   case ISD::GlobalAddress:
2941     return lowerGlobalAddress(Op, DAG);
2942   case ISD::BlockAddress:
2943     return lowerBlockAddress(Op, DAG);
2944   case ISD::ConstantPool:
2945     return lowerConstantPool(Op, DAG);
2946   case ISD::JumpTable:
2947     return lowerJumpTable(Op, DAG);
2948   case ISD::GlobalTLSAddress:
2949     return lowerGlobalTLSAddress(Op, DAG);
2950   case ISD::SELECT:
2951     return lowerSELECT(Op, DAG);
2952   case ISD::BRCOND:
2953     return lowerBRCOND(Op, DAG);
2954   case ISD::VASTART:
2955     return lowerVASTART(Op, DAG);
2956   case ISD::FRAMEADDR:
2957     return lowerFRAMEADDR(Op, DAG);
2958   case ISD::RETURNADDR:
2959     return lowerRETURNADDR(Op, DAG);
2960   case ISD::SHL_PARTS:
2961     return lowerShiftLeftParts(Op, DAG);
2962   case ISD::SRA_PARTS:
2963     return lowerShiftRightParts(Op, DAG, true);
2964   case ISD::SRL_PARTS:
2965     return lowerShiftRightParts(Op, DAG, false);
2966   case ISD::BITCAST: {
2967     SDLoc DL(Op);
2968     EVT VT = Op.getValueType();
2969     SDValue Op0 = Op.getOperand(0);
2970     EVT Op0VT = Op0.getValueType();
2971     MVT XLenVT = Subtarget.getXLenVT();
2972     if (VT.isFixedLengthVector()) {
2973       // We can handle fixed length vector bitcasts with a simple replacement
2974       // in isel.
2975       if (Op0VT.isFixedLengthVector())
2976         return Op;
2977       // When bitcasting from scalar to fixed-length vector, insert the scalar
2978       // into a one-element vector of the result type, and perform a vector
2979       // bitcast.
2980       if (!Op0VT.isVector()) {
2981         EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2982         if (!isTypeLegal(BVT))
2983           return SDValue();
2984         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2985                                               DAG.getUNDEF(BVT), Op0,
2986                                               DAG.getConstant(0, DL, XLenVT)));
2987       }
2988       return SDValue();
2989     }
2990     // Custom-legalize bitcasts from fixed-length vector types to scalar types
2991     // thus: bitcast the vector to a one-element vector type whose element type
2992     // is the same as the result type, and extract the first element.
2993     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2994       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
2995       if (!isTypeLegal(BVT))
2996         return SDValue();
2997       SDValue BVec = DAG.getBitcast(BVT, Op0);
2998       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
2999                          DAG.getConstant(0, DL, XLenVT));
3000     }
3001     if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
3002       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
3003       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
3004       return FPConv;
3005     }
3006     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
3007         Subtarget.hasStdExtF()) {
3008       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3009       SDValue FPConv =
3010           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
3011       return FPConv;
3012     }
3013     return SDValue();
3014   }
3015   case ISD::INTRINSIC_WO_CHAIN:
3016     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3017   case ISD::INTRINSIC_W_CHAIN:
3018     return LowerINTRINSIC_W_CHAIN(Op, DAG);
3019   case ISD::INTRINSIC_VOID:
3020     return LowerINTRINSIC_VOID(Op, DAG);
3021   case ISD::BSWAP:
3022   case ISD::BITREVERSE: {
3023     MVT VT = Op.getSimpleValueType();
3024     SDLoc DL(Op);
3025     if (Subtarget.hasStdExtZbp()) {
3026       // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
3027       // Start with the maximum immediate value which is the bitwidth - 1.
3028       unsigned Imm = VT.getSizeInBits() - 1;
3029       // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
3030       if (Op.getOpcode() == ISD::BSWAP)
3031         Imm &= ~0x7U;
3032       return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
3033                          DAG.getConstant(Imm, DL, VT));
3034     }
3035     assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
3036     assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
3037     // Expand bitreverse to a bswap(rev8) followed by brev8.
3038     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
3039     // We use the Zbp grevi encoding for rev.b/brev8 which will be recognized
3040     // as brev8 by an isel pattern.
3041     return DAG.getNode(RISCVISD::GREV, DL, VT, BSwap,
3042                        DAG.getConstant(7, DL, VT));
3043   }
3044   case ISD::FSHL:
3045   case ISD::FSHR: {
3046     MVT VT = Op.getSimpleValueType();
3047     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
3048     SDLoc DL(Op);
3049     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
3050     // use log(XLen) bits. Mask the shift amount accordingly to prevent
3051     // accidentally setting the extra bit.
3052     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
3053     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
3054                                 DAG.getConstant(ShAmtWidth, DL, VT));
3055     // fshl and fshr concatenate their operands in the same order. fsr and fsl
3056     // instruction use different orders. fshl will return its first operand for
3057     // shift of zero, fshr will return its second operand. fsl and fsr both
3058     // return rs1 so the ISD nodes need to have different operand orders.
3059     // Shift amount is in rs2.
3060     SDValue Op0 = Op.getOperand(0);
3061     SDValue Op1 = Op.getOperand(1);
3062     unsigned Opc = RISCVISD::FSL;
3063     if (Op.getOpcode() == ISD::FSHR) {
3064       std::swap(Op0, Op1);
3065       Opc = RISCVISD::FSR;
3066     }
3067     return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
3068   }
3069   case ISD::TRUNCATE:
3070     // Only custom-lower vector truncates
3071     if (!Op.getSimpleValueType().isVector())
3072       return Op;
3073     return lowerVectorTruncLike(Op, DAG);
3074   case ISD::ANY_EXTEND:
3075   case ISD::ZERO_EXTEND:
3076     if (Op.getOperand(0).getValueType().isVector() &&
3077         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3078       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
3079     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
3080   case ISD::SIGN_EXTEND:
3081     if (Op.getOperand(0).getValueType().isVector() &&
3082         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3083       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
3084     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
3085   case ISD::SPLAT_VECTOR_PARTS:
3086     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
3087   case ISD::INSERT_VECTOR_ELT:
3088     return lowerINSERT_VECTOR_ELT(Op, DAG);
3089   case ISD::EXTRACT_VECTOR_ELT:
3090     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
3091   case ISD::VSCALE: {
3092     MVT VT = Op.getSimpleValueType();
3093     SDLoc DL(Op);
3094     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
3095     // We define our scalable vector types for lmul=1 to use a 64 bit known
3096     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
3097     // vscale as VLENB / 8.
3098     static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
3099     if (Subtarget.getMinVLen() < RISCV::RVVBitsPerBlock)
3100       report_fatal_error("Support for VLEN==32 is incomplete.");
3101     // We assume VLENB is a multiple of 8. We manually choose the best shift
3102     // here because SimplifyDemandedBits isn't always able to simplify it.
3103     uint64_t Val = Op.getConstantOperandVal(0);
3104     if (isPowerOf2_64(Val)) {
3105       uint64_t Log2 = Log2_64(Val);
3106       if (Log2 < 3)
3107         return DAG.getNode(ISD::SRL, DL, VT, VLENB,
3108                            DAG.getConstant(3 - Log2, DL, VT));
3109       if (Log2 > 3)
3110         return DAG.getNode(ISD::SHL, DL, VT, VLENB,
3111                            DAG.getConstant(Log2 - 3, DL, VT));
3112       return VLENB;
3113     }
3114     // If the multiplier is a multiple of 8, scale it down to avoid needing
3115     // to shift the VLENB value.
3116     if ((Val % 8) == 0)
3117       return DAG.getNode(ISD::MUL, DL, VT, VLENB,
3118                          DAG.getConstant(Val / 8, DL, VT));
3119 
3120     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
3121                                  DAG.getConstant(3, DL, VT));
3122     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
3123   }
3124   case ISD::FPOWI: {
3125     // Custom promote f16 powi with illegal i32 integer type on RV64. Once
3126     // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
3127     if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
3128         Op.getOperand(1).getValueType() == MVT::i32) {
3129       SDLoc DL(Op);
3130       SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
3131       SDValue Powi =
3132           DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
3133       return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
3134                          DAG.getIntPtrConstant(0, DL));
3135     }
3136     return SDValue();
3137   }
3138   case ISD::FP_EXTEND:
3139   case ISD::FP_ROUND:
3140     if (!Op.getValueType().isVector())
3141       return Op;
3142     return lowerVectorFPExtendOrRoundLike(Op, DAG);
3143   case ISD::FP_TO_SINT:
3144   case ISD::FP_TO_UINT:
3145   case ISD::SINT_TO_FP:
3146   case ISD::UINT_TO_FP: {
3147     // RVV can only do fp<->int conversions to types half/double the size as
3148     // the source. We custom-lower any conversions that do two hops into
3149     // sequences.
3150     MVT VT = Op.getSimpleValueType();
3151     if (!VT.isVector())
3152       return Op;
3153     SDLoc DL(Op);
3154     SDValue Src = Op.getOperand(0);
3155     MVT EltVT = VT.getVectorElementType();
3156     MVT SrcVT = Src.getSimpleValueType();
3157     MVT SrcEltVT = SrcVT.getVectorElementType();
3158     unsigned EltSize = EltVT.getSizeInBits();
3159     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3160     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
3161            "Unexpected vector element types");
3162 
3163     bool IsInt2FP = SrcEltVT.isInteger();
3164     // Widening conversions
3165     if (EltSize > (2 * SrcEltSize)) {
3166       if (IsInt2FP) {
3167         // Do a regular integer sign/zero extension then convert to float.
3168         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize),
3169                                       VT.getVectorElementCount());
3170         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
3171                                  ? ISD::ZERO_EXTEND
3172                                  : ISD::SIGN_EXTEND;
3173         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
3174         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
3175       }
3176       // FP2Int
3177       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
3178       // Do one doubling fp_extend then complete the operation by converting
3179       // to int.
3180       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3181       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
3182       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
3183     }
3184 
3185     // Narrowing conversions
3186     if (SrcEltSize > (2 * EltSize)) {
3187       if (IsInt2FP) {
3188         // One narrowing int_to_fp, then an fp_round.
3189         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
3190         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3191         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
3192         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
3193       }
3194       // FP2Int
3195       // One narrowing fp_to_int, then truncate the integer. If the float isn't
3196       // representable by the integer, the result is poison.
3197       MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
3198                                     VT.getVectorElementCount());
3199       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
3200       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
3201     }
3202 
3203     // Scalable vectors can exit here. Patterns will handle equally-sized
3204     // conversions halving/doubling ones.
3205     if (!VT.isFixedLengthVector())
3206       return Op;
3207 
3208     // For fixed-length vectors we lower to a custom "VL" node.
3209     unsigned RVVOpc = 0;
3210     switch (Op.getOpcode()) {
3211     default:
3212       llvm_unreachable("Impossible opcode");
3213     case ISD::FP_TO_SINT:
3214       RVVOpc = RISCVISD::FP_TO_SINT_VL;
3215       break;
3216     case ISD::FP_TO_UINT:
3217       RVVOpc = RISCVISD::FP_TO_UINT_VL;
3218       break;
3219     case ISD::SINT_TO_FP:
3220       RVVOpc = RISCVISD::SINT_TO_FP_VL;
3221       break;
3222     case ISD::UINT_TO_FP:
3223       RVVOpc = RISCVISD::UINT_TO_FP_VL;
3224       break;
3225     }
3226 
3227     MVT ContainerVT, SrcContainerVT;
3228     // Derive the reference container type from the larger vector type.
3229     if (SrcEltSize > EltSize) {
3230       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3231       ContainerVT =
3232           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3233     } else {
3234       ContainerVT = getContainerForFixedLengthVector(VT);
3235       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
3236     }
3237 
3238     SDValue Mask, VL;
3239     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3240 
3241     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3242     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
3243     return convertFromScalableVector(VT, Src, DAG, Subtarget);
3244   }
3245   case ISD::FP_TO_SINT_SAT:
3246   case ISD::FP_TO_UINT_SAT:
3247     return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
3248   case ISD::FTRUNC:
3249   case ISD::FCEIL:
3250   case ISD::FFLOOR:
3251     return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
3252   case ISD::FROUND:
3253     return lowerFROUND(Op, DAG);
3254   case ISD::VECREDUCE_ADD:
3255   case ISD::VECREDUCE_UMAX:
3256   case ISD::VECREDUCE_SMAX:
3257   case ISD::VECREDUCE_UMIN:
3258   case ISD::VECREDUCE_SMIN:
3259     return lowerVECREDUCE(Op, DAG);
3260   case ISD::VECREDUCE_AND:
3261   case ISD::VECREDUCE_OR:
3262   case ISD::VECREDUCE_XOR:
3263     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3264       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
3265     return lowerVECREDUCE(Op, DAG);
3266   case ISD::VECREDUCE_FADD:
3267   case ISD::VECREDUCE_SEQ_FADD:
3268   case ISD::VECREDUCE_FMIN:
3269   case ISD::VECREDUCE_FMAX:
3270     return lowerFPVECREDUCE(Op, DAG);
3271   case ISD::VP_REDUCE_ADD:
3272   case ISD::VP_REDUCE_UMAX:
3273   case ISD::VP_REDUCE_SMAX:
3274   case ISD::VP_REDUCE_UMIN:
3275   case ISD::VP_REDUCE_SMIN:
3276   case ISD::VP_REDUCE_FADD:
3277   case ISD::VP_REDUCE_SEQ_FADD:
3278   case ISD::VP_REDUCE_FMIN:
3279   case ISD::VP_REDUCE_FMAX:
3280     return lowerVPREDUCE(Op, DAG);
3281   case ISD::VP_REDUCE_AND:
3282   case ISD::VP_REDUCE_OR:
3283   case ISD::VP_REDUCE_XOR:
3284     if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
3285       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
3286     return lowerVPREDUCE(Op, DAG);
3287   case ISD::INSERT_SUBVECTOR:
3288     return lowerINSERT_SUBVECTOR(Op, DAG);
3289   case ISD::EXTRACT_SUBVECTOR:
3290     return lowerEXTRACT_SUBVECTOR(Op, DAG);
3291   case ISD::STEP_VECTOR:
3292     return lowerSTEP_VECTOR(Op, DAG);
3293   case ISD::VECTOR_REVERSE:
3294     return lowerVECTOR_REVERSE(Op, DAG);
3295   case ISD::VECTOR_SPLICE:
3296     return lowerVECTOR_SPLICE(Op, DAG);
3297   case ISD::BUILD_VECTOR:
3298     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
3299   case ISD::SPLAT_VECTOR:
3300     if (Op.getValueType().getVectorElementType() == MVT::i1)
3301       return lowerVectorMaskSplat(Op, DAG);
3302     return SDValue();
3303   case ISD::VECTOR_SHUFFLE:
3304     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
3305   case ISD::CONCAT_VECTORS: {
3306     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
3307     // better than going through the stack, as the default expansion does.
3308     SDLoc DL(Op);
3309     MVT VT = Op.getSimpleValueType();
3310     unsigned NumOpElts =
3311         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
3312     SDValue Vec = DAG.getUNDEF(VT);
3313     for (const auto &OpIdx : enumerate(Op->ops())) {
3314       SDValue SubVec = OpIdx.value();
3315       // Don't insert undef subvectors.
3316       if (SubVec.isUndef())
3317         continue;
3318       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
3319                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
3320     }
3321     return Vec;
3322   }
3323   case ISD::LOAD:
3324     if (auto V = expandUnalignedRVVLoad(Op, DAG))
3325       return V;
3326     if (Op.getValueType().isFixedLengthVector())
3327       return lowerFixedLengthVectorLoadToRVV(Op, DAG);
3328     return Op;
3329   case ISD::STORE:
3330     if (auto V = expandUnalignedRVVStore(Op, DAG))
3331       return V;
3332     if (Op.getOperand(1).getValueType().isFixedLengthVector())
3333       return lowerFixedLengthVectorStoreToRVV(Op, DAG);
3334     return Op;
3335   case ISD::MLOAD:
3336   case ISD::VP_LOAD:
3337     return lowerMaskedLoad(Op, DAG);
3338   case ISD::MSTORE:
3339   case ISD::VP_STORE:
3340     return lowerMaskedStore(Op, DAG);
3341   case ISD::SETCC:
3342     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
3343   case ISD::ADD:
3344     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
3345   case ISD::SUB:
3346     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
3347   case ISD::MUL:
3348     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
3349   case ISD::MULHS:
3350     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
3351   case ISD::MULHU:
3352     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
3353   case ISD::AND:
3354     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
3355                                               RISCVISD::AND_VL);
3356   case ISD::OR:
3357     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
3358                                               RISCVISD::OR_VL);
3359   case ISD::XOR:
3360     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
3361                                               RISCVISD::XOR_VL);
3362   case ISD::SDIV:
3363     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
3364   case ISD::SREM:
3365     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
3366   case ISD::UDIV:
3367     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
3368   case ISD::UREM:
3369     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
3370   case ISD::SHL:
3371   case ISD::SRA:
3372   case ISD::SRL:
3373     if (Op.getSimpleValueType().isFixedLengthVector())
3374       return lowerFixedLengthVectorShiftToRVV(Op, DAG);
3375     // This can be called for an i32 shift amount that needs to be promoted.
3376     assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
3377            "Unexpected custom legalisation");
3378     return SDValue();
3379   case ISD::SADDSAT:
3380     return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
3381   case ISD::UADDSAT:
3382     return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
3383   case ISD::SSUBSAT:
3384     return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
3385   case ISD::USUBSAT:
3386     return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
3387   case ISD::FADD:
3388     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
3389   case ISD::FSUB:
3390     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
3391   case ISD::FMUL:
3392     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
3393   case ISD::FDIV:
3394     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
3395   case ISD::FNEG:
3396     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
3397   case ISD::FABS:
3398     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
3399   case ISD::FSQRT:
3400     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
3401   case ISD::FMA:
3402     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
3403   case ISD::SMIN:
3404     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
3405   case ISD::SMAX:
3406     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
3407   case ISD::UMIN:
3408     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
3409   case ISD::UMAX:
3410     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
3411   case ISD::FMINNUM:
3412     return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
3413   case ISD::FMAXNUM:
3414     return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
3415   case ISD::ABS:
3416     return lowerABS(Op, DAG);
3417   case ISD::CTLZ_ZERO_UNDEF:
3418   case ISD::CTTZ_ZERO_UNDEF:
3419     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
3420   case ISD::VSELECT:
3421     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
3422   case ISD::FCOPYSIGN:
3423     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
3424   case ISD::MGATHER:
3425   case ISD::VP_GATHER:
3426     return lowerMaskedGather(Op, DAG);
3427   case ISD::MSCATTER:
3428   case ISD::VP_SCATTER:
3429     return lowerMaskedScatter(Op, DAG);
3430   case ISD::FLT_ROUNDS_:
3431     return lowerGET_ROUNDING(Op, DAG);
3432   case ISD::SET_ROUNDING:
3433     return lowerSET_ROUNDING(Op, DAG);
3434   case ISD::VP_SELECT:
3435     return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
3436   case ISD::VP_MERGE:
3437     return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
3438   case ISD::VP_ADD:
3439     return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
3440   case ISD::VP_SUB:
3441     return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
3442   case ISD::VP_MUL:
3443     return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
3444   case ISD::VP_SDIV:
3445     return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
3446   case ISD::VP_UDIV:
3447     return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
3448   case ISD::VP_SREM:
3449     return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
3450   case ISD::VP_UREM:
3451     return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
3452   case ISD::VP_AND:
3453     return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
3454   case ISD::VP_OR:
3455     return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
3456   case ISD::VP_XOR:
3457     return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
3458   case ISD::VP_ASHR:
3459     return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
3460   case ISD::VP_LSHR:
3461     return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
3462   case ISD::VP_SHL:
3463     return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
3464   case ISD::VP_FADD:
3465     return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
3466   case ISD::VP_FSUB:
3467     return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
3468   case ISD::VP_FMUL:
3469     return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
3470   case ISD::VP_FDIV:
3471     return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
3472   case ISD::VP_FNEG:
3473     return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
3474   case ISD::VP_FMA:
3475     return lowerVPOp(Op, DAG, RISCVISD::FMA_VL);
3476   case ISD::VP_SIGN_EXTEND:
3477   case ISD::VP_ZERO_EXTEND:
3478     if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
3479       return lowerVPExtMaskOp(Op, DAG);
3480     return lowerVPOp(Op, DAG,
3481                      Op.getOpcode() == ISD::VP_SIGN_EXTEND
3482                          ? RISCVISD::VSEXT_VL
3483                          : RISCVISD::VZEXT_VL);
3484   case ISD::VP_TRUNCATE:
3485     return lowerVectorTruncLike(Op, DAG);
3486   case ISD::VP_FP_EXTEND:
3487   case ISD::VP_FP_ROUND:
3488     return lowerVectorFPExtendOrRoundLike(Op, DAG);
3489   case ISD::VP_FPTOSI:
3490     return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL);
3491   case ISD::VP_FPTOUI:
3492     return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_UINT_VL);
3493   case ISD::VP_SITOFP:
3494     return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
3495   case ISD::VP_UITOFP:
3496     return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
3497   case ISD::VP_SETCC:
3498     if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
3499       return lowerVPSetCCMaskOp(Op, DAG);
3500     return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL);
3501   }
3502 }
3503 
3504 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
3505                              SelectionDAG &DAG, unsigned Flags) {
3506   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3507 }
3508 
3509 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
3510                              SelectionDAG &DAG, unsigned Flags) {
3511   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3512                                    Flags);
3513 }
3514 
3515 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
3516                              SelectionDAG &DAG, unsigned Flags) {
3517   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3518                                    N->getOffset(), Flags);
3519 }
3520 
3521 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
3522                              SelectionDAG &DAG, unsigned Flags) {
3523   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3524 }
3525 
3526 template <class NodeTy>
3527 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3528                                      bool IsLocal) const {
3529   SDLoc DL(N);
3530   EVT Ty = getPointerTy(DAG.getDataLayout());
3531 
3532   if (isPositionIndependent()) {
3533     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3534     if (IsLocal)
3535       // Use PC-relative addressing to access the symbol. This generates the
3536       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
3537       // %pcrel_lo(auipc)).
3538       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3539 
3540     // Use PC-relative addressing to access the GOT for this symbol, then load
3541     // the address from the GOT. This generates the pattern (PseudoLA sym),
3542     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
3543     SDValue Load =
3544         SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
3545     MachineFunction &MF = DAG.getMachineFunction();
3546     MachineMemOperand *MemOp = MF.getMachineMemOperand(
3547         MachinePointerInfo::getGOT(MF),
3548         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
3549             MachineMemOperand::MOInvariant,
3550         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3551     DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3552     return Load;
3553   }
3554 
3555   switch (getTargetMachine().getCodeModel()) {
3556   default:
3557     report_fatal_error("Unsupported code model for lowering");
3558   case CodeModel::Small: {
3559     // Generate a sequence for accessing addresses within the first 2 GiB of
3560     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
3561     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
3562     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
3563     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3564     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
3565   }
3566   case CodeModel::Medium: {
3567     // Generate a sequence for accessing addresses within any 2GiB range within
3568     // the address space. This generates the pattern (PseudoLLA sym), which
3569     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
3570     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3571     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3572   }
3573   }
3574 }
3575 
3576 template SDValue RISCVTargetLowering::getAddr<GlobalAddressSDNode>(
3577     GlobalAddressSDNode *N, SelectionDAG &DAG, bool IsLocal) const;
3578 template SDValue RISCVTargetLowering::getAddr<BlockAddressSDNode>(
3579     BlockAddressSDNode *N, SelectionDAG &DAG, bool IsLocal) const;
3580 template SDValue RISCVTargetLowering::getAddr<ConstantPoolSDNode>(
3581     ConstantPoolSDNode *N, SelectionDAG &DAG, bool IsLocal) const;
3582 template SDValue RISCVTargetLowering::getAddr<JumpTableSDNode>(
3583     JumpTableSDNode *N, SelectionDAG &DAG, bool IsLocal) const;
3584 
3585 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
3586                                                 SelectionDAG &DAG) const {
3587   SDLoc DL(Op);
3588   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3589   assert(N->getOffset() == 0 && "unexpected offset in global node");
3590 
3591   const GlobalValue *GV = N->getGlobal();
3592   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
3593   return getAddr(N, DAG, IsLocal);
3594 }
3595 
3596 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
3597                                                SelectionDAG &DAG) const {
3598   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
3599 
3600   return getAddr(N, DAG);
3601 }
3602 
3603 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
3604                                                SelectionDAG &DAG) const {
3605   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
3606 
3607   return getAddr(N, DAG);
3608 }
3609 
3610 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
3611                                             SelectionDAG &DAG) const {
3612   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
3613 
3614   return getAddr(N, DAG);
3615 }
3616 
3617 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3618                                               SelectionDAG &DAG,
3619                                               bool UseGOT) const {
3620   SDLoc DL(N);
3621   EVT Ty = getPointerTy(DAG.getDataLayout());
3622   const GlobalValue *GV = N->getGlobal();
3623   MVT XLenVT = Subtarget.getXLenVT();
3624 
3625   if (UseGOT) {
3626     // Use PC-relative addressing to access the GOT for this TLS symbol, then
3627     // load the address from the GOT and add the thread pointer. This generates
3628     // the pattern (PseudoLA_TLS_IE sym), which expands to
3629     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
3630     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3631     SDValue Load =
3632         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
3633     MachineFunction &MF = DAG.getMachineFunction();
3634     MachineMemOperand *MemOp = MF.getMachineMemOperand(
3635         MachinePointerInfo::getGOT(MF),
3636         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
3637             MachineMemOperand::MOInvariant,
3638         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3639     DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3640 
3641     // Add the thread pointer.
3642     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3643     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
3644   }
3645 
3646   // Generate a sequence for accessing the address relative to the thread
3647   // pointer, with the appropriate adjustment for the thread pointer offset.
3648   // This generates the pattern
3649   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
3650   SDValue AddrHi =
3651       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
3652   SDValue AddrAdd =
3653       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
3654   SDValue AddrLo =
3655       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
3656 
3657   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3658   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3659   SDValue MNAdd = SDValue(
3660       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
3661       0);
3662   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
3663 }
3664 
3665 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3666                                                SelectionDAG &DAG) const {
3667   SDLoc DL(N);
3668   EVT Ty = getPointerTy(DAG.getDataLayout());
3669   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3670   const GlobalValue *GV = N->getGlobal();
3671 
3672   // Use a PC-relative addressing mode to access the global dynamic GOT address.
3673   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3674   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3675   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3676   SDValue Load =
3677       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
3678 
3679   // Prepare argument list to generate call.
3680   ArgListTy Args;
3681   ArgListEntry Entry;
3682   Entry.Node = Load;
3683   Entry.Ty = CallTy;
3684   Args.push_back(Entry);
3685 
3686   // Setup call to __tls_get_addr.
3687   TargetLowering::CallLoweringInfo CLI(DAG);
3688   CLI.setDebugLoc(DL)
3689       .setChain(DAG.getEntryNode())
3690       .setLibCallee(CallingConv::C, CallTy,
3691                     DAG.getExternalSymbol("__tls_get_addr", Ty),
3692                     std::move(Args));
3693 
3694   return LowerCallTo(CLI).first;
3695 }
3696 
3697 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3698                                                    SelectionDAG &DAG) const {
3699   SDLoc DL(Op);
3700   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3701   assert(N->getOffset() == 0 && "unexpected offset in global node");
3702 
3703   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
3704 
3705   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3706       CallingConv::GHC)
3707     report_fatal_error("In GHC calling convention TLS is not supported");
3708 
3709   SDValue Addr;
3710   switch (Model) {
3711   case TLSModel::LocalExec:
3712     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3713     break;
3714   case TLSModel::InitialExec:
3715     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3716     break;
3717   case TLSModel::LocalDynamic:
3718   case TLSModel::GeneralDynamic:
3719     Addr = getDynamicTLSAddr(N, DAG);
3720     break;
3721   }
3722 
3723   return Addr;
3724 }
3725 
3726 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3727   SDValue CondV = Op.getOperand(0);
3728   SDValue TrueV = Op.getOperand(1);
3729   SDValue FalseV = Op.getOperand(2);
3730   SDLoc DL(Op);
3731   MVT VT = Op.getSimpleValueType();
3732   MVT XLenVT = Subtarget.getXLenVT();
3733 
3734   // Lower vector SELECTs to VSELECTs by splatting the condition.
3735   if (VT.isVector()) {
3736     MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3737     SDValue CondSplat = VT.isScalableVector()
3738                             ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3739                             : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3740     return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3741   }
3742 
3743   // If the result type is XLenVT and CondV is the output of a SETCC node
3744   // which also operated on XLenVT inputs, then merge the SETCC node into the
3745   // lowered RISCVISD::SELECT_CC to take advantage of the integer
3746   // compare+branch instructions. i.e.:
3747   // (select (setcc lhs, rhs, cc), truev, falsev)
3748   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3749   if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3750       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3751     SDValue LHS = CondV.getOperand(0);
3752     SDValue RHS = CondV.getOperand(1);
3753     const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3754     ISD::CondCode CCVal = CC->get();
3755 
3756     // Special case for a select of 2 constants that have a diffence of 1.
3757     // Normally this is done by DAGCombine, but if the select is introduced by
3758     // type legalization or op legalization, we miss it. Restricting to SETLT
3759     // case for now because that is what signed saturating add/sub need.
3760     // FIXME: We don't need the condition to be SETLT or even a SETCC,
3761     // but we would probably want to swap the true/false values if the condition
3762     // is SETGE/SETLE to avoid an XORI.
3763     if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3764         CCVal == ISD::SETLT) {
3765       const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
3766       const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
3767       if (TrueVal - 1 == FalseVal)
3768         return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
3769       if (TrueVal + 1 == FalseVal)
3770         return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
3771     }
3772 
3773     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3774 
3775     SDValue TargetCC = DAG.getCondCode(CCVal);
3776     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
3777     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3778   }
3779 
3780   // Otherwise:
3781   // (select condv, truev, falsev)
3782   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
3783   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3784   SDValue SetNE = DAG.getCondCode(ISD::SETNE);
3785 
3786   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
3787 
3788   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3789 }
3790 
3791 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
3792   SDValue CondV = Op.getOperand(1);
3793   SDLoc DL(Op);
3794   MVT XLenVT = Subtarget.getXLenVT();
3795 
3796   if (CondV.getOpcode() == ISD::SETCC &&
3797       CondV.getOperand(0).getValueType() == XLenVT) {
3798     SDValue LHS = CondV.getOperand(0);
3799     SDValue RHS = CondV.getOperand(1);
3800     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
3801 
3802     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3803 
3804     SDValue TargetCC = DAG.getCondCode(CCVal);
3805     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3806                        LHS, RHS, TargetCC, Op.getOperand(2));
3807   }
3808 
3809   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3810                      CondV, DAG.getConstant(0, DL, XLenVT),
3811                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3812 }
3813 
3814 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3815   MachineFunction &MF = DAG.getMachineFunction();
3816   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
3817 
3818   SDLoc DL(Op);
3819   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3820                                  getPointerTy(MF.getDataLayout()));
3821 
3822   // vastart just stores the address of the VarArgsFrameIndex slot into the
3823   // memory location argument.
3824   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3825   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3826                       MachinePointerInfo(SV));
3827 }
3828 
3829 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3830                                             SelectionDAG &DAG) const {
3831   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3832   MachineFunction &MF = DAG.getMachineFunction();
3833   MachineFrameInfo &MFI = MF.getFrameInfo();
3834   MFI.setFrameAddressIsTaken(true);
3835   Register FrameReg = RI.getFrameRegister(MF);
3836   int XLenInBytes = Subtarget.getXLen() / 8;
3837 
3838   EVT VT = Op.getValueType();
3839   SDLoc DL(Op);
3840   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3841   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3842   while (Depth--) {
3843     int Offset = -(XLenInBytes * 2);
3844     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3845                               DAG.getIntPtrConstant(Offset, DL));
3846     FrameAddr =
3847         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3848   }
3849   return FrameAddr;
3850 }
3851 
3852 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3853                                              SelectionDAG &DAG) const {
3854   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3855   MachineFunction &MF = DAG.getMachineFunction();
3856   MachineFrameInfo &MFI = MF.getFrameInfo();
3857   MFI.setReturnAddressIsTaken(true);
3858   MVT XLenVT = Subtarget.getXLenVT();
3859   int XLenInBytes = Subtarget.getXLen() / 8;
3860 
3861   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3862     return SDValue();
3863 
3864   EVT VT = Op.getValueType();
3865   SDLoc DL(Op);
3866   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3867   if (Depth) {
3868     int Off = -XLenInBytes;
3869     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3870     SDValue Offset = DAG.getConstant(Off, DL, VT);
3871     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3872                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3873                        MachinePointerInfo());
3874   }
3875 
3876   // Return the value of the return address register, marking it an implicit
3877   // live-in.
3878   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3879   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3880 }
3881 
3882 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3883                                                  SelectionDAG &DAG) const {
3884   SDLoc DL(Op);
3885   SDValue Lo = Op.getOperand(0);
3886   SDValue Hi = Op.getOperand(1);
3887   SDValue Shamt = Op.getOperand(2);
3888   EVT VT = Lo.getValueType();
3889 
3890   // if Shamt-XLEN < 0: // Shamt < XLEN
3891   //   Lo = Lo << Shamt
3892   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
3893   // else:
3894   //   Lo = 0
3895   //   Hi = Lo << (Shamt-XLEN)
3896 
3897   SDValue Zero = DAG.getConstant(0, DL, VT);
3898   SDValue One = DAG.getConstant(1, DL, VT);
3899   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3900   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3901   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3902   SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
3903 
3904   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3905   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3906   SDValue ShiftRightLo =
3907       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
3908   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3909   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3910   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
3911 
3912   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3913 
3914   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3915   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3916 
3917   SDValue Parts[2] = {Lo, Hi};
3918   return DAG.getMergeValues(Parts, DL);
3919 }
3920 
3921 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
3922                                                   bool IsSRA) const {
3923   SDLoc DL(Op);
3924   SDValue Lo = Op.getOperand(0);
3925   SDValue Hi = Op.getOperand(1);
3926   SDValue Shamt = Op.getOperand(2);
3927   EVT VT = Lo.getValueType();
3928 
3929   // SRA expansion:
3930   //   if Shamt-XLEN < 0: // Shamt < XLEN
3931   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
3932   //     Hi = Hi >>s Shamt
3933   //   else:
3934   //     Lo = Hi >>s (Shamt-XLEN);
3935   //     Hi = Hi >>s (XLEN-1)
3936   //
3937   // SRL expansion:
3938   //   if Shamt-XLEN < 0: // Shamt < XLEN
3939   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
3940   //     Hi = Hi >>u Shamt
3941   //   else:
3942   //     Lo = Hi >>u (Shamt-XLEN);
3943   //     Hi = 0;
3944 
3945   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3946 
3947   SDValue Zero = DAG.getConstant(0, DL, VT);
3948   SDValue One = DAG.getConstant(1, DL, VT);
3949   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3950   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3951   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3952   SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
3953 
3954   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3955   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3956   SDValue ShiftLeftHi =
3957       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
3958   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3959   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3960   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
3961   SDValue HiFalse =
3962       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
3963 
3964   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3965 
3966   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3967   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3968 
3969   SDValue Parts[2] = {Lo, Hi};
3970   return DAG.getMergeValues(Parts, DL);
3971 }
3972 
3973 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
3974 // legal equivalently-sized i8 type, so we can use that as a go-between.
3975 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
3976                                                   SelectionDAG &DAG) const {
3977   SDLoc DL(Op);
3978   MVT VT = Op.getSimpleValueType();
3979   SDValue SplatVal = Op.getOperand(0);
3980   // All-zeros or all-ones splats are handled specially.
3981   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
3982     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3983     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
3984   }
3985   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
3986     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3987     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
3988   }
3989   MVT XLenVT = Subtarget.getXLenVT();
3990   assert(SplatVal.getValueType() == XLenVT &&
3991          "Unexpected type for i1 splat value");
3992   MVT InterVT = VT.changeVectorElementType(MVT::i8);
3993   SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
3994                          DAG.getConstant(1, DL, XLenVT));
3995   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
3996   SDValue Zero = DAG.getConstant(0, DL, InterVT);
3997   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
3998 }
3999 
4000 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
4001 // illegal (currently only vXi64 RV32).
4002 // FIXME: We could also catch non-constant sign-extended i32 values and lower
4003 // them to VMV_V_X_VL.
4004 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
4005                                                      SelectionDAG &DAG) const {
4006   SDLoc DL(Op);
4007   MVT VecVT = Op.getSimpleValueType();
4008   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
4009          "Unexpected SPLAT_VECTOR_PARTS lowering");
4010 
4011   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
4012   SDValue Lo = Op.getOperand(0);
4013   SDValue Hi = Op.getOperand(1);
4014 
4015   if (VecVT.isFixedLengthVector()) {
4016     MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4017     SDLoc DL(Op);
4018     SDValue Mask, VL;
4019     std::tie(Mask, VL) =
4020         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4021 
4022     SDValue Res =
4023         splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
4024     return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
4025   }
4026 
4027   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4028     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4029     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4030     // If Hi constant is all the same sign bit as Lo, lower this as a custom
4031     // node in order to try and match RVV vector/scalar instructions.
4032     if ((LoC >> 31) == HiC)
4033       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
4034                          Lo, DAG.getRegister(RISCV::X0, MVT::i32));
4035   }
4036 
4037   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4038   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4039       isa<ConstantSDNode>(Hi.getOperand(1)) &&
4040       Hi.getConstantOperandVal(1) == 31)
4041     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
4042                        DAG.getRegister(RISCV::X0, MVT::i32));
4043 
4044   // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
4045   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT,
4046                      DAG.getUNDEF(VecVT), Lo, Hi,
4047                      DAG.getRegister(RISCV::X0, MVT::i32));
4048 }
4049 
4050 // Custom-lower extensions from mask vectors by using a vselect either with 1
4051 // for zero/any-extension or -1 for sign-extension:
4052 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
4053 // Note that any-extension is lowered identically to zero-extension.
4054 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
4055                                                 int64_t ExtTrueVal) const {
4056   SDLoc DL(Op);
4057   MVT VecVT = Op.getSimpleValueType();
4058   SDValue Src = Op.getOperand(0);
4059   // Only custom-lower extensions from mask types
4060   assert(Src.getValueType().isVector() &&
4061          Src.getValueType().getVectorElementType() == MVT::i1);
4062 
4063   if (VecVT.isScalableVector()) {
4064     SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
4065     SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
4066     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
4067   }
4068 
4069   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4070   MVT I1ContainerVT =
4071       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4072 
4073   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
4074 
4075   SDValue Mask, VL;
4076   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4077 
4078   MVT XLenVT = Subtarget.getXLenVT();
4079   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
4080   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
4081 
4082   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4083                           DAG.getUNDEF(ContainerVT), SplatZero, VL);
4084   SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4085                              DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
4086   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
4087                                SplatTrueVal, SplatZero, VL);
4088 
4089   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
4090 }
4091 
4092 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
4093     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
4094   MVT ExtVT = Op.getSimpleValueType();
4095   // Only custom-lower extensions from fixed-length vector types.
4096   if (!ExtVT.isFixedLengthVector())
4097     return Op;
4098   MVT VT = Op.getOperand(0).getSimpleValueType();
4099   // Grab the canonical container type for the extended type. Infer the smaller
4100   // type from that to ensure the same number of vector elements, as we know
4101   // the LMUL will be sufficient to hold the smaller type.
4102   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
4103   // Get the extended container type manually to ensure the same number of
4104   // vector elements between source and dest.
4105   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
4106                                      ContainerExtVT.getVectorElementCount());
4107 
4108   SDValue Op1 =
4109       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
4110 
4111   SDLoc DL(Op);
4112   SDValue Mask, VL;
4113   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4114 
4115   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
4116 
4117   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
4118 }
4119 
4120 // Custom-lower truncations from vectors to mask vectors by using a mask and a
4121 // setcc operation:
4122 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
4123 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
4124                                                       SelectionDAG &DAG) const {
4125   bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
4126   SDLoc DL(Op);
4127   EVT MaskVT = Op.getValueType();
4128   // Only expect to custom-lower truncations to mask types
4129   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
4130          "Unexpected type for vector mask lowering");
4131   SDValue Src = Op.getOperand(0);
4132   MVT VecVT = Src.getSimpleValueType();
4133   SDValue Mask, VL;
4134   if (IsVPTrunc) {
4135     Mask = Op.getOperand(1);
4136     VL = Op.getOperand(2);
4137   }
4138   // If this is a fixed vector, we need to convert it to a scalable vector.
4139   MVT ContainerVT = VecVT;
4140 
4141   if (VecVT.isFixedLengthVector()) {
4142     ContainerVT = getContainerForFixedLengthVector(VecVT);
4143     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4144     if (IsVPTrunc) {
4145       MVT MaskContainerVT =
4146           getContainerForFixedLengthVector(Mask.getSimpleValueType());
4147       Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
4148     }
4149   }
4150 
4151   if (!IsVPTrunc) {
4152     std::tie(Mask, VL) =
4153         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4154   }
4155 
4156   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
4157   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
4158 
4159   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4160                          DAG.getUNDEF(ContainerVT), SplatOne, VL);
4161   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4162                           DAG.getUNDEF(ContainerVT), SplatZero, VL);
4163 
4164   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4165   SDValue Trunc =
4166       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
4167   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
4168                       DAG.getCondCode(ISD::SETNE), Mask, VL);
4169   if (MaskVT.isFixedLengthVector())
4170     Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
4171   return Trunc;
4172 }
4173 
4174 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
4175                                                   SelectionDAG &DAG) const {
4176   bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
4177   SDLoc DL(Op);
4178 
4179   MVT VT = Op.getSimpleValueType();
4180   // Only custom-lower vector truncates
4181   assert(VT.isVector() && "Unexpected type for vector truncate lowering");
4182 
4183   // Truncates to mask types are handled differently
4184   if (VT.getVectorElementType() == MVT::i1)
4185     return lowerVectorMaskTruncLike(Op, DAG);
4186 
4187   // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
4188   // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
4189   // truncate by one power of two at a time.
4190   MVT DstEltVT = VT.getVectorElementType();
4191 
4192   SDValue Src = Op.getOperand(0);
4193   MVT SrcVT = Src.getSimpleValueType();
4194   MVT SrcEltVT = SrcVT.getVectorElementType();
4195 
4196   assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
4197          isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
4198          "Unexpected vector truncate lowering");
4199 
4200   MVT ContainerVT = SrcVT;
4201   SDValue Mask, VL;
4202   if (IsVPTrunc) {
4203     Mask = Op.getOperand(1);
4204     VL = Op.getOperand(2);
4205   }
4206   if (SrcVT.isFixedLengthVector()) {
4207     ContainerVT = getContainerForFixedLengthVector(SrcVT);
4208     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4209     if (IsVPTrunc) {
4210       MVT MaskVT = getMaskTypeFor(ContainerVT);
4211       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4212     }
4213   }
4214 
4215   SDValue Result = Src;
4216   if (!IsVPTrunc) {
4217     std::tie(Mask, VL) =
4218         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4219   }
4220 
4221   LLVMContext &Context = *DAG.getContext();
4222   const ElementCount Count = ContainerVT.getVectorElementCount();
4223   do {
4224     SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
4225     EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
4226     Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
4227                          Mask, VL);
4228   } while (SrcEltVT != DstEltVT);
4229 
4230   if (SrcVT.isFixedLengthVector())
4231     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4232 
4233   return Result;
4234 }
4235 
4236 SDValue
4237 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
4238                                                     SelectionDAG &DAG) const {
4239   bool IsVP =
4240       Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
4241   bool IsExtend =
4242       Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
4243   // RVV can only do truncate fp to types half the size as the source. We
4244   // custom-lower f64->f16 rounds via RVV's round-to-odd float
4245   // conversion instruction.
4246   SDLoc DL(Op);
4247   MVT VT = Op.getSimpleValueType();
4248 
4249   assert(VT.isVector() && "Unexpected type for vector truncate lowering");
4250 
4251   SDValue Src = Op.getOperand(0);
4252   MVT SrcVT = Src.getSimpleValueType();
4253 
4254   bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
4255                                      SrcVT.getVectorElementType() != MVT::f16);
4256   bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
4257                                      SrcVT.getVectorElementType() != MVT::f64);
4258 
4259   bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
4260 
4261   // Prepare any fixed-length vector operands.
4262   MVT ContainerVT = VT;
4263   SDValue Mask, VL;
4264   if (IsVP) {
4265     Mask = Op.getOperand(1);
4266     VL = Op.getOperand(2);
4267   }
4268   if (VT.isFixedLengthVector()) {
4269     MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
4270     ContainerVT =
4271         SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
4272     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4273     if (IsVP) {
4274       MVT MaskVT = getMaskTypeFor(ContainerVT);
4275       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4276     }
4277   }
4278 
4279   if (!IsVP)
4280     std::tie(Mask, VL) =
4281         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4282 
4283   unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
4284 
4285   if (IsDirectConv) {
4286     Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
4287     if (VT.isFixedLengthVector())
4288       Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
4289     return Src;
4290   }
4291 
4292   unsigned InterConvOpc =
4293       IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
4294 
4295   MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
4296   SDValue IntermediateConv =
4297       DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
4298   SDValue Result =
4299       DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
4300   if (VT.isFixedLengthVector())
4301     return convertFromScalableVector(VT, Result, DAG, Subtarget);
4302   return Result;
4303 }
4304 
4305 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
4306 // first position of a vector, and that vector is slid up to the insert index.
4307 // By limiting the active vector length to index+1 and merging with the
4308 // original vector (with an undisturbed tail policy for elements >= VL), we
4309 // achieve the desired result of leaving all elements untouched except the one
4310 // at VL-1, which is replaced with the desired value.
4311 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4312                                                     SelectionDAG &DAG) const {
4313   SDLoc DL(Op);
4314   MVT VecVT = Op.getSimpleValueType();
4315   SDValue Vec = Op.getOperand(0);
4316   SDValue Val = Op.getOperand(1);
4317   SDValue Idx = Op.getOperand(2);
4318 
4319   if (VecVT.getVectorElementType() == MVT::i1) {
4320     // FIXME: For now we just promote to an i8 vector and insert into that,
4321     // but this is probably not optimal.
4322     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
4323     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
4324     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
4325     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
4326   }
4327 
4328   MVT ContainerVT = VecVT;
4329   // If the operand is a fixed-length vector, convert to a scalable one.
4330   if (VecVT.isFixedLengthVector()) {
4331     ContainerVT = getContainerForFixedLengthVector(VecVT);
4332     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4333   }
4334 
4335   MVT XLenVT = Subtarget.getXLenVT();
4336 
4337   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
4338   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
4339   // Even i64-element vectors on RV32 can be lowered without scalar
4340   // legalization if the most-significant 32 bits of the value are not affected
4341   // by the sign-extension of the lower 32 bits.
4342   // TODO: We could also catch sign extensions of a 32-bit value.
4343   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
4344     const auto *CVal = cast<ConstantSDNode>(Val);
4345     if (isInt<32>(CVal->getSExtValue())) {
4346       IsLegalInsert = true;
4347       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
4348     }
4349   }
4350 
4351   SDValue Mask, VL;
4352   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4353 
4354   SDValue ValInVec;
4355 
4356   if (IsLegalInsert) {
4357     unsigned Opc =
4358         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4359     if (isNullConstant(Idx)) {
4360       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
4361       if (!VecVT.isFixedLengthVector())
4362         return Vec;
4363       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
4364     }
4365     ValInVec =
4366         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
4367   } else {
4368     // On RV32, i64-element vectors must be specially handled to place the
4369     // value at element 0, by using two vslide1up instructions in sequence on
4370     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
4371     // this.
4372     SDValue One = DAG.getConstant(1, DL, XLenVT);
4373     SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
4374     SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
4375     MVT I32ContainerVT =
4376         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
4377     SDValue I32Mask =
4378         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
4379     // Limit the active VL to two.
4380     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
4381     // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
4382     // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
4383     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT,
4384                            DAG.getUNDEF(I32ContainerVT), Zero, InsertI64VL);
4385     // First slide in the hi value, then the lo in underneath it.
4386     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT,
4387                            DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
4388                            I32Mask, InsertI64VL);
4389     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT,
4390                            DAG.getUNDEF(I32ContainerVT), ValInVec, ValLo,
4391                            I32Mask, InsertI64VL);
4392     // Bitcast back to the right container type.
4393     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
4394   }
4395 
4396   // Now that the value is in a vector, slide it into position.
4397   SDValue InsertVL =
4398       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
4399   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
4400                                 ValInVec, Idx, Mask, InsertVL);
4401   if (!VecVT.isFixedLengthVector())
4402     return Slideup;
4403   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
4404 }
4405 
4406 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
4407 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
4408 // types this is done using VMV_X_S to allow us to glean information about the
4409 // sign bits of the result.
4410 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4411                                                      SelectionDAG &DAG) const {
4412   SDLoc DL(Op);
4413   SDValue Idx = Op.getOperand(1);
4414   SDValue Vec = Op.getOperand(0);
4415   EVT EltVT = Op.getValueType();
4416   MVT VecVT = Vec.getSimpleValueType();
4417   MVT XLenVT = Subtarget.getXLenVT();
4418 
4419   if (VecVT.getVectorElementType() == MVT::i1) {
4420     if (VecVT.isFixedLengthVector()) {
4421       unsigned NumElts = VecVT.getVectorNumElements();
4422       if (NumElts >= 8) {
4423         MVT WideEltVT;
4424         unsigned WidenVecLen;
4425         SDValue ExtractElementIdx;
4426         SDValue ExtractBitIdx;
4427         unsigned MaxEEW = Subtarget.getELEN();
4428         MVT LargestEltVT = MVT::getIntegerVT(
4429             std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
4430         if (NumElts <= LargestEltVT.getSizeInBits()) {
4431           assert(isPowerOf2_32(NumElts) &&
4432                  "the number of elements should be power of 2");
4433           WideEltVT = MVT::getIntegerVT(NumElts);
4434           WidenVecLen = 1;
4435           ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
4436           ExtractBitIdx = Idx;
4437         } else {
4438           WideEltVT = LargestEltVT;
4439           WidenVecLen = NumElts / WideEltVT.getSizeInBits();
4440           // extract element index = index / element width
4441           ExtractElementIdx = DAG.getNode(
4442               ISD::SRL, DL, XLenVT, Idx,
4443               DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
4444           // mask bit index = index % element width
4445           ExtractBitIdx = DAG.getNode(
4446               ISD::AND, DL, XLenVT, Idx,
4447               DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
4448         }
4449         MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
4450         Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
4451         SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
4452                                          Vec, ExtractElementIdx);
4453         // Extract the bit from GPR.
4454         SDValue ShiftRight =
4455             DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
4456         return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
4457                            DAG.getConstant(1, DL, XLenVT));
4458       }
4459     }
4460     // Otherwise, promote to an i8 vector and extract from that.
4461     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
4462     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
4463     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
4464   }
4465 
4466   // If this is a fixed vector, we need to convert it to a scalable vector.
4467   MVT ContainerVT = VecVT;
4468   if (VecVT.isFixedLengthVector()) {
4469     ContainerVT = getContainerForFixedLengthVector(VecVT);
4470     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4471   }
4472 
4473   // If the index is 0, the vector is already in the right position.
4474   if (!isNullConstant(Idx)) {
4475     // Use a VL of 1 to avoid processing more elements than we need.
4476     SDValue VL = DAG.getConstant(1, DL, XLenVT);
4477     SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
4478     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
4479                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
4480   }
4481 
4482   if (!EltVT.isInteger()) {
4483     // Floating-point extracts are handled in TableGen.
4484     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
4485                        DAG.getConstant(0, DL, XLenVT));
4486   }
4487 
4488   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
4489   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
4490 }
4491 
4492 // Some RVV intrinsics may claim that they want an integer operand to be
4493 // promoted or expanded.
4494 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
4495                                            const RISCVSubtarget &Subtarget) {
4496   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4497           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
4498          "Unexpected opcode");
4499 
4500   if (!Subtarget.hasVInstructions())
4501     return SDValue();
4502 
4503   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
4504   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
4505   SDLoc DL(Op);
4506 
4507   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
4508       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
4509   if (!II || !II->hasScalarOperand())
4510     return SDValue();
4511 
4512   unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
4513   assert(SplatOp < Op.getNumOperands());
4514 
4515   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
4516   SDValue &ScalarOp = Operands[SplatOp];
4517   MVT OpVT = ScalarOp.getSimpleValueType();
4518   MVT XLenVT = Subtarget.getXLenVT();
4519 
4520   // If this isn't a scalar, or its type is XLenVT we're done.
4521   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
4522     return SDValue();
4523 
4524   // Simplest case is that the operand needs to be promoted to XLenVT.
4525   if (OpVT.bitsLT(XLenVT)) {
4526     // If the operand is a constant, sign extend to increase our chances
4527     // of being able to use a .vi instruction. ANY_EXTEND would become a
4528     // a zero extend and the simm5 check in isel would fail.
4529     // FIXME: Should we ignore the upper bits in isel instead?
4530     unsigned ExtOpc =
4531         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4532     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
4533     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4534   }
4535 
4536   // Use the previous operand to get the vXi64 VT. The result might be a mask
4537   // VT for compares. Using the previous operand assumes that the previous
4538   // operand will never have a smaller element size than a scalar operand and
4539   // that a widening operation never uses SEW=64.
4540   // NOTE: If this fails the below assert, we can probably just find the
4541   // element count from any operand or result and use it to construct the VT.
4542   assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
4543   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
4544 
4545   // The more complex case is when the scalar is larger than XLenVT.
4546   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
4547          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
4548 
4549   // If this is a sign-extended 32-bit value, we can truncate it and rely on the
4550   // instruction to sign-extend since SEW>XLEN.
4551   if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
4552     ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
4553     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4554   }
4555 
4556   switch (IntNo) {
4557   case Intrinsic::riscv_vslide1up:
4558   case Intrinsic::riscv_vslide1down:
4559   case Intrinsic::riscv_vslide1up_mask:
4560   case Intrinsic::riscv_vslide1down_mask: {
4561     // We need to special case these when the scalar is larger than XLen.
4562     unsigned NumOps = Op.getNumOperands();
4563     bool IsMasked = NumOps == 7;
4564 
4565     // Convert the vector source to the equivalent nxvXi32 vector.
4566     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4567     SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
4568 
4569     SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp,
4570                                    DAG.getConstant(0, DL, XLenVT));
4571     SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp,
4572                                    DAG.getConstant(1, DL, XLenVT));
4573 
4574     // Double the VL since we halved SEW.
4575     SDValue AVL = getVLOperand(Op);
4576     SDValue I32VL;
4577 
4578     // Optimize for constant AVL
4579     if (isa<ConstantSDNode>(AVL)) {
4580       unsigned EltSize = VT.getScalarSizeInBits();
4581       unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
4582 
4583       unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
4584       unsigned MaxVLMAX =
4585           RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
4586 
4587       unsigned VectorBitsMin = Subtarget.getRealMinVLen();
4588       unsigned MinVLMAX =
4589           RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
4590 
4591       uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
4592       if (AVLInt <= MinVLMAX) {
4593         I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
4594       } else if (AVLInt >= 2 * MaxVLMAX) {
4595         // Just set vl to VLMAX in this situation
4596         RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
4597         SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
4598         unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
4599         SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
4600         SDValue SETVLMAX = DAG.getTargetConstant(
4601             Intrinsic::riscv_vsetvlimax_opt, DL, MVT::i32);
4602         I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
4603                             LMUL);
4604       } else {
4605         // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
4606         // is related to the hardware implementation.
4607         // So let the following code handle
4608       }
4609     }
4610     if (!I32VL) {
4611       RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
4612       SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
4613       unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
4614       SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
4615       SDValue SETVL =
4616           DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32);
4617       // Using vsetvli instruction to get actually used length which related to
4618       // the hardware implementation
4619       SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
4620                                SEW, LMUL);
4621       I32VL =
4622           DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
4623     }
4624 
4625     SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
4626 
4627     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
4628     // instructions.
4629     SDValue Passthru;
4630     if (IsMasked)
4631       Passthru = DAG.getUNDEF(I32VT);
4632     else
4633       Passthru = DAG.getBitcast(I32VT, Operands[1]);
4634 
4635     if (IntNo == Intrinsic::riscv_vslide1up ||
4636         IntNo == Intrinsic::riscv_vslide1up_mask) {
4637       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
4638                         ScalarHi, I32Mask, I32VL);
4639       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
4640                         ScalarLo, I32Mask, I32VL);
4641     } else {
4642       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
4643                         ScalarLo, I32Mask, I32VL);
4644       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
4645                         ScalarHi, I32Mask, I32VL);
4646     }
4647 
4648     // Convert back to nxvXi64.
4649     Vec = DAG.getBitcast(VT, Vec);
4650 
4651     if (!IsMasked)
4652       return Vec;
4653     // Apply mask after the operation.
4654     SDValue Mask = Operands[NumOps - 3];
4655     SDValue MaskedOff = Operands[1];
4656     // Assume Policy operand is the last operand.
4657     uint64_t Policy =
4658         cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue();
4659     // We don't need to select maskedoff if it's undef.
4660     if (MaskedOff.isUndef())
4661       return Vec;
4662     // TAMU
4663     if (Policy == RISCVII::TAIL_AGNOSTIC)
4664       return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
4665                          AVL);
4666     // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
4667     // It's fine because vmerge does not care mask policy.
4668     return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
4669                        AVL);
4670   }
4671   }
4672 
4673   // We need to convert the scalar to a splat vector.
4674   SDValue VL = getVLOperand(Op);
4675   assert(VL.getValueType() == XLenVT);
4676   ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
4677   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4678 }
4679 
4680 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
4681                                                      SelectionDAG &DAG) const {
4682   unsigned IntNo = Op.getConstantOperandVal(0);
4683   SDLoc DL(Op);
4684   MVT XLenVT = Subtarget.getXLenVT();
4685 
4686   switch (IntNo) {
4687   default:
4688     break; // Don't custom lower most intrinsics.
4689   case Intrinsic::thread_pointer: {
4690     EVT PtrVT = getPointerTy(DAG.getDataLayout());
4691     return DAG.getRegister(RISCV::X4, PtrVT);
4692   }
4693   case Intrinsic::riscv_orc_b:
4694   case Intrinsic::riscv_brev8: {
4695     // Lower to the GORCI encoding for orc.b or the GREVI encoding for brev8.
4696     unsigned Opc =
4697         IntNo == Intrinsic::riscv_brev8 ? RISCVISD::GREV : RISCVISD::GORC;
4698     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
4699                        DAG.getConstant(7, DL, XLenVT));
4700   }
4701   case Intrinsic::riscv_grev:
4702   case Intrinsic::riscv_gorc: {
4703     unsigned Opc =
4704         IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
4705     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4706   }
4707   case Intrinsic::riscv_zip:
4708   case Intrinsic::riscv_unzip: {
4709     // Lower to the SHFLI encoding for zip or the UNSHFLI encoding for unzip.
4710     // For i32 the immediate is 15. For i64 the immediate is 31.
4711     unsigned Opc =
4712         IntNo == Intrinsic::riscv_zip ? RISCVISD::SHFL : RISCVISD::UNSHFL;
4713     unsigned BitWidth = Op.getValueSizeInBits();
4714     assert(isPowerOf2_32(BitWidth) && BitWidth >= 2 && "Unexpected bit width");
4715     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
4716                        DAG.getConstant((BitWidth / 2) - 1, DL, XLenVT));
4717   }
4718   case Intrinsic::riscv_shfl:
4719   case Intrinsic::riscv_unshfl: {
4720     unsigned Opc =
4721         IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
4722     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4723   }
4724   case Intrinsic::riscv_bcompress:
4725   case Intrinsic::riscv_bdecompress: {
4726     unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
4727                                                        : RISCVISD::BDECOMPRESS;
4728     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4729   }
4730   case Intrinsic::riscv_bfp:
4731     return DAG.getNode(RISCVISD::BFP, DL, XLenVT, Op.getOperand(1),
4732                        Op.getOperand(2));
4733   case Intrinsic::riscv_fsl:
4734     return DAG.getNode(RISCVISD::FSL, DL, XLenVT, Op.getOperand(1),
4735                        Op.getOperand(2), Op.getOperand(3));
4736   case Intrinsic::riscv_fsr:
4737     return DAG.getNode(RISCVISD::FSR, DL, XLenVT, Op.getOperand(1),
4738                        Op.getOperand(2), Op.getOperand(3));
4739   case Intrinsic::riscv_vmv_x_s:
4740     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
4741     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
4742                        Op.getOperand(1));
4743   case Intrinsic::riscv_vmv_v_x:
4744     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
4745                             Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
4746                             Subtarget);
4747   case Intrinsic::riscv_vfmv_v_f:
4748     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
4749                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4750   case Intrinsic::riscv_vmv_s_x: {
4751     SDValue Scalar = Op.getOperand(2);
4752 
4753     if (Scalar.getValueType().bitsLE(XLenVT)) {
4754       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
4755       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
4756                          Op.getOperand(1), Scalar, Op.getOperand(3));
4757     }
4758 
4759     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
4760 
4761     // This is an i64 value that lives in two scalar registers. We have to
4762     // insert this in a convoluted way. First we build vXi64 splat containing
4763     // the two values that we assemble using some bit math. Next we'll use
4764     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
4765     // to merge element 0 from our splat into the source vector.
4766     // FIXME: This is probably not the best way to do this, but it is
4767     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
4768     // point.
4769     //   sw lo, (a0)
4770     //   sw hi, 4(a0)
4771     //   vlse vX, (a0)
4772     //
4773     //   vid.v      vVid
4774     //   vmseq.vx   mMask, vVid, 0
4775     //   vmerge.vvm vDest, vSrc, vVal, mMask
4776     MVT VT = Op.getSimpleValueType();
4777     SDValue Vec = Op.getOperand(1);
4778     SDValue VL = getVLOperand(Op);
4779 
4780     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
4781     if (Op.getOperand(1).isUndef())
4782       return SplattedVal;
4783     SDValue SplattedIdx =
4784         DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
4785                     DAG.getConstant(0, DL, MVT::i32), VL);
4786 
4787     MVT MaskVT = getMaskTypeFor(VT);
4788     SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
4789     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
4790     SDValue SelectCond =
4791         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
4792                     DAG.getCondCode(ISD::SETEQ), Mask, VL);
4793     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
4794                        Vec, VL);
4795   }
4796   }
4797 
4798   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
4799 }
4800 
4801 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
4802                                                     SelectionDAG &DAG) const {
4803   unsigned IntNo = Op.getConstantOperandVal(1);
4804   switch (IntNo) {
4805   default:
4806     break;
4807   case Intrinsic::riscv_masked_strided_load: {
4808     SDLoc DL(Op);
4809     MVT XLenVT = Subtarget.getXLenVT();
4810 
4811     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4812     // the selection of the masked intrinsics doesn't do this for us.
4813     SDValue Mask = Op.getOperand(5);
4814     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4815 
4816     MVT VT = Op->getSimpleValueType(0);
4817     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4818 
4819     SDValue PassThru = Op.getOperand(2);
4820     if (!IsUnmasked) {
4821       MVT MaskVT = getMaskTypeFor(ContainerVT);
4822       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4823       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4824     }
4825 
4826     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4827 
4828     SDValue IntID = DAG.getTargetConstant(
4829         IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
4830         XLenVT);
4831 
4832     auto *Load = cast<MemIntrinsicSDNode>(Op);
4833     SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
4834     if (IsUnmasked)
4835       Ops.push_back(DAG.getUNDEF(ContainerVT));
4836     else
4837       Ops.push_back(PassThru);
4838     Ops.push_back(Op.getOperand(3)); // Ptr
4839     Ops.push_back(Op.getOperand(4)); // Stride
4840     if (!IsUnmasked)
4841       Ops.push_back(Mask);
4842     Ops.push_back(VL);
4843     if (!IsUnmasked) {
4844       SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
4845       Ops.push_back(Policy);
4846     }
4847 
4848     SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4849     SDValue Result =
4850         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
4851                                 Load->getMemoryVT(), Load->getMemOperand());
4852     SDValue Chain = Result.getValue(1);
4853     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4854     return DAG.getMergeValues({Result, Chain}, DL);
4855   }
4856   case Intrinsic::riscv_seg2_load:
4857   case Intrinsic::riscv_seg3_load:
4858   case Intrinsic::riscv_seg4_load:
4859   case Intrinsic::riscv_seg5_load:
4860   case Intrinsic::riscv_seg6_load:
4861   case Intrinsic::riscv_seg7_load:
4862   case Intrinsic::riscv_seg8_load: {
4863     SDLoc DL(Op);
4864     static const Intrinsic::ID VlsegInts[7] = {
4865         Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
4866         Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
4867         Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
4868         Intrinsic::riscv_vlseg8};
4869     unsigned NF = Op->getNumValues() - 1;
4870     assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
4871     MVT XLenVT = Subtarget.getXLenVT();
4872     MVT VT = Op->getSimpleValueType(0);
4873     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4874 
4875     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4876     SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
4877     auto *Load = cast<MemIntrinsicSDNode>(Op);
4878     SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
4879     ContainerVTs.push_back(MVT::Other);
4880     SDVTList VTs = DAG.getVTList(ContainerVTs);
4881     SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
4882     Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
4883     Ops.push_back(Op.getOperand(2));
4884     Ops.push_back(VL);
4885     SDValue Result =
4886         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
4887                                 Load->getMemoryVT(), Load->getMemOperand());
4888     SmallVector<SDValue, 9> Results;
4889     for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
4890       Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
4891                                                   DAG, Subtarget));
4892     Results.push_back(Result.getValue(NF));
4893     return DAG.getMergeValues(Results, DL);
4894   }
4895   }
4896 
4897   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
4898 }
4899 
4900 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
4901                                                  SelectionDAG &DAG) const {
4902   unsigned IntNo = Op.getConstantOperandVal(1);
4903   switch (IntNo) {
4904   default:
4905     break;
4906   case Intrinsic::riscv_masked_strided_store: {
4907     SDLoc DL(Op);
4908     MVT XLenVT = Subtarget.getXLenVT();
4909 
4910     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
4911     // the selection of the masked intrinsics doesn't do this for us.
4912     SDValue Mask = Op.getOperand(5);
4913     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4914 
4915     SDValue Val = Op.getOperand(2);
4916     MVT VT = Val.getSimpleValueType();
4917     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4918 
4919     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
4920     if (!IsUnmasked) {
4921       MVT MaskVT = getMaskTypeFor(ContainerVT);
4922       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4923     }
4924 
4925     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
4926 
4927     SDValue IntID = DAG.getTargetConstant(
4928         IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
4929         XLenVT);
4930 
4931     auto *Store = cast<MemIntrinsicSDNode>(Op);
4932     SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
4933     Ops.push_back(Val);
4934     Ops.push_back(Op.getOperand(3)); // Ptr
4935     Ops.push_back(Op.getOperand(4)); // Stride
4936     if (!IsUnmasked)
4937       Ops.push_back(Mask);
4938     Ops.push_back(VL);
4939 
4940     return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
4941                                    Ops, Store->getMemoryVT(),
4942                                    Store->getMemOperand());
4943   }
4944   }
4945 
4946   return SDValue();
4947 }
4948 
4949 static MVT getLMUL1VT(MVT VT) {
4950   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
4951          "Unexpected vector MVT");
4952   return MVT::getScalableVectorVT(
4953       VT.getVectorElementType(),
4954       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
4955 }
4956 
4957 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
4958   switch (ISDOpcode) {
4959   default:
4960     llvm_unreachable("Unhandled reduction");
4961   case ISD::VECREDUCE_ADD:
4962     return RISCVISD::VECREDUCE_ADD_VL;
4963   case ISD::VECREDUCE_UMAX:
4964     return RISCVISD::VECREDUCE_UMAX_VL;
4965   case ISD::VECREDUCE_SMAX:
4966     return RISCVISD::VECREDUCE_SMAX_VL;
4967   case ISD::VECREDUCE_UMIN:
4968     return RISCVISD::VECREDUCE_UMIN_VL;
4969   case ISD::VECREDUCE_SMIN:
4970     return RISCVISD::VECREDUCE_SMIN_VL;
4971   case ISD::VECREDUCE_AND:
4972     return RISCVISD::VECREDUCE_AND_VL;
4973   case ISD::VECREDUCE_OR:
4974     return RISCVISD::VECREDUCE_OR_VL;
4975   case ISD::VECREDUCE_XOR:
4976     return RISCVISD::VECREDUCE_XOR_VL;
4977   }
4978 }
4979 
4980 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
4981                                                          SelectionDAG &DAG,
4982                                                          bool IsVP) const {
4983   SDLoc DL(Op);
4984   SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
4985   MVT VecVT = Vec.getSimpleValueType();
4986   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
4987           Op.getOpcode() == ISD::VECREDUCE_OR ||
4988           Op.getOpcode() == ISD::VECREDUCE_XOR ||
4989           Op.getOpcode() == ISD::VP_REDUCE_AND ||
4990           Op.getOpcode() == ISD::VP_REDUCE_OR ||
4991           Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
4992          "Unexpected reduction lowering");
4993 
4994   MVT XLenVT = Subtarget.getXLenVT();
4995   assert(Op.getValueType() == XLenVT &&
4996          "Expected reduction output to be legalized to XLenVT");
4997 
4998   MVT ContainerVT = VecVT;
4999   if (VecVT.isFixedLengthVector()) {
5000     ContainerVT = getContainerForFixedLengthVector(VecVT);
5001     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5002   }
5003 
5004   SDValue Mask, VL;
5005   if (IsVP) {
5006     Mask = Op.getOperand(2);
5007     VL = Op.getOperand(3);
5008   } else {
5009     std::tie(Mask, VL) =
5010         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
5011   }
5012 
5013   unsigned BaseOpc;
5014   ISD::CondCode CC;
5015   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
5016 
5017   switch (Op.getOpcode()) {
5018   default:
5019     llvm_unreachable("Unhandled reduction");
5020   case ISD::VECREDUCE_AND:
5021   case ISD::VP_REDUCE_AND: {
5022     // vcpop ~x == 0
5023     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
5024     Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
5025     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
5026     CC = ISD::SETEQ;
5027     BaseOpc = ISD::AND;
5028     break;
5029   }
5030   case ISD::VECREDUCE_OR:
5031   case ISD::VP_REDUCE_OR:
5032     // vcpop x != 0
5033     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
5034     CC = ISD::SETNE;
5035     BaseOpc = ISD::OR;
5036     break;
5037   case ISD::VECREDUCE_XOR:
5038   case ISD::VP_REDUCE_XOR: {
5039     // ((vcpop x) & 1) != 0
5040     SDValue One = DAG.getConstant(1, DL, XLenVT);
5041     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
5042     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
5043     CC = ISD::SETNE;
5044     BaseOpc = ISD::XOR;
5045     break;
5046   }
5047   }
5048 
5049   SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
5050 
5051   if (!IsVP)
5052     return SetCC;
5053 
5054   // Now include the start value in the operation.
5055   // Note that we must return the start value when no elements are operated
5056   // upon. The vcpop instructions we've emitted in each case above will return
5057   // 0 for an inactive vector, and so we've already received the neutral value:
5058   // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
5059   // can simply include the start value.
5060   return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
5061 }
5062 
5063 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
5064                                             SelectionDAG &DAG) const {
5065   SDLoc DL(Op);
5066   SDValue Vec = Op.getOperand(0);
5067   EVT VecEVT = Vec.getValueType();
5068 
5069   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
5070 
5071   // Due to ordering in legalize types we may have a vector type that needs to
5072   // be split. Do that manually so we can get down to a legal type.
5073   while (getTypeAction(*DAG.getContext(), VecEVT) ==
5074          TargetLowering::TypeSplitVector) {
5075     SDValue Lo, Hi;
5076     std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
5077     VecEVT = Lo.getValueType();
5078     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
5079   }
5080 
5081   // TODO: The type may need to be widened rather than split. Or widened before
5082   // it can be split.
5083   if (!isTypeLegal(VecEVT))
5084     return SDValue();
5085 
5086   MVT VecVT = VecEVT.getSimpleVT();
5087   MVT VecEltVT = VecVT.getVectorElementType();
5088   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
5089 
5090   MVT ContainerVT = VecVT;
5091   if (VecVT.isFixedLengthVector()) {
5092     ContainerVT = getContainerForFixedLengthVector(VecVT);
5093     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5094   }
5095 
5096   MVT M1VT = getLMUL1VT(ContainerVT);
5097   MVT XLenVT = Subtarget.getXLenVT();
5098 
5099   SDValue Mask, VL;
5100   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
5101 
5102   SDValue NeutralElem =
5103       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
5104   SDValue IdentitySplat =
5105       lowerScalarSplat(SDValue(), NeutralElem, DAG.getConstant(1, DL, XLenVT),
5106                        M1VT, DL, DAG, Subtarget);
5107   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
5108                                   IdentitySplat, Mask, VL);
5109   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
5110                              DAG.getConstant(0, DL, XLenVT));
5111   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
5112 }
5113 
5114 // Given a reduction op, this function returns the matching reduction opcode,
5115 // the vector SDValue and the scalar SDValue required to lower this to a
5116 // RISCVISD node.
5117 static std::tuple<unsigned, SDValue, SDValue>
5118 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
5119   SDLoc DL(Op);
5120   auto Flags = Op->getFlags();
5121   unsigned Opcode = Op.getOpcode();
5122   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
5123   switch (Opcode) {
5124   default:
5125     llvm_unreachable("Unhandled reduction");
5126   case ISD::VECREDUCE_FADD: {
5127     // Use positive zero if we can. It is cheaper to materialize.
5128     SDValue Zero =
5129         DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
5130     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
5131   }
5132   case ISD::VECREDUCE_SEQ_FADD:
5133     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
5134                            Op.getOperand(0));
5135   case ISD::VECREDUCE_FMIN:
5136     return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
5137                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
5138   case ISD::VECREDUCE_FMAX:
5139     return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
5140                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
5141   }
5142 }
5143 
5144 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
5145                                               SelectionDAG &DAG) const {
5146   SDLoc DL(Op);
5147   MVT VecEltVT = Op.getSimpleValueType();
5148 
5149   unsigned RVVOpcode;
5150   SDValue VectorVal, ScalarVal;
5151   std::tie(RVVOpcode, VectorVal, ScalarVal) =
5152       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
5153   MVT VecVT = VectorVal.getSimpleValueType();
5154 
5155   MVT ContainerVT = VecVT;
5156   if (VecVT.isFixedLengthVector()) {
5157     ContainerVT = getContainerForFixedLengthVector(VecVT);
5158     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
5159   }
5160 
5161   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
5162   MVT XLenVT = Subtarget.getXLenVT();
5163 
5164   SDValue Mask, VL;
5165   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
5166 
5167   SDValue ScalarSplat =
5168       lowerScalarSplat(SDValue(), ScalarVal, DAG.getConstant(1, DL, XLenVT),
5169                        M1VT, DL, DAG, Subtarget);
5170   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
5171                                   VectorVal, ScalarSplat, Mask, VL);
5172   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
5173                      DAG.getConstant(0, DL, XLenVT));
5174 }
5175 
5176 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
5177   switch (ISDOpcode) {
5178   default:
5179     llvm_unreachable("Unhandled reduction");
5180   case ISD::VP_REDUCE_ADD:
5181     return RISCVISD::VECREDUCE_ADD_VL;
5182   case ISD::VP_REDUCE_UMAX:
5183     return RISCVISD::VECREDUCE_UMAX_VL;
5184   case ISD::VP_REDUCE_SMAX:
5185     return RISCVISD::VECREDUCE_SMAX_VL;
5186   case ISD::VP_REDUCE_UMIN:
5187     return RISCVISD::VECREDUCE_UMIN_VL;
5188   case ISD::VP_REDUCE_SMIN:
5189     return RISCVISD::VECREDUCE_SMIN_VL;
5190   case ISD::VP_REDUCE_AND:
5191     return RISCVISD::VECREDUCE_AND_VL;
5192   case ISD::VP_REDUCE_OR:
5193     return RISCVISD::VECREDUCE_OR_VL;
5194   case ISD::VP_REDUCE_XOR:
5195     return RISCVISD::VECREDUCE_XOR_VL;
5196   case ISD::VP_REDUCE_FADD:
5197     return RISCVISD::VECREDUCE_FADD_VL;
5198   case ISD::VP_REDUCE_SEQ_FADD:
5199     return RISCVISD::VECREDUCE_SEQ_FADD_VL;
5200   case ISD::VP_REDUCE_FMAX:
5201     return RISCVISD::VECREDUCE_FMAX_VL;
5202   case ISD::VP_REDUCE_FMIN:
5203     return RISCVISD::VECREDUCE_FMIN_VL;
5204   }
5205 }
5206 
5207 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
5208                                            SelectionDAG &DAG) const {
5209   SDLoc DL(Op);
5210   SDValue Vec = Op.getOperand(1);
5211   EVT VecEVT = Vec.getValueType();
5212 
5213   // TODO: The type may need to be widened rather than split. Or widened before
5214   // it can be split.
5215   if (!isTypeLegal(VecEVT))
5216     return SDValue();
5217 
5218   MVT VecVT = VecEVT.getSimpleVT();
5219   MVT VecEltVT = VecVT.getVectorElementType();
5220   unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
5221 
5222   MVT ContainerVT = VecVT;
5223   if (VecVT.isFixedLengthVector()) {
5224     ContainerVT = getContainerForFixedLengthVector(VecVT);
5225     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5226   }
5227 
5228   SDValue VL = Op.getOperand(3);
5229   SDValue Mask = Op.getOperand(2);
5230 
5231   MVT M1VT = getLMUL1VT(ContainerVT);
5232   MVT XLenVT = Subtarget.getXLenVT();
5233   MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
5234 
5235   SDValue StartSplat = lowerScalarSplat(SDValue(), Op.getOperand(0),
5236                                         DAG.getConstant(1, DL, XLenVT), M1VT,
5237                                         DL, DAG, Subtarget);
5238   SDValue Reduction =
5239       DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
5240   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
5241                              DAG.getConstant(0, DL, XLenVT));
5242   if (!VecVT.isInteger())
5243     return Elt0;
5244   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
5245 }
5246 
5247 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
5248                                                    SelectionDAG &DAG) const {
5249   SDValue Vec = Op.getOperand(0);
5250   SDValue SubVec = Op.getOperand(1);
5251   MVT VecVT = Vec.getSimpleValueType();
5252   MVT SubVecVT = SubVec.getSimpleValueType();
5253 
5254   SDLoc DL(Op);
5255   MVT XLenVT = Subtarget.getXLenVT();
5256   unsigned OrigIdx = Op.getConstantOperandVal(2);
5257   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
5258 
5259   // We don't have the ability to slide mask vectors up indexed by their i1
5260   // elements; the smallest we can do is i8. Often we are able to bitcast to
5261   // equivalent i8 vectors. Note that when inserting a fixed-length vector
5262   // into a scalable one, we might not necessarily have enough scalable
5263   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
5264   if (SubVecVT.getVectorElementType() == MVT::i1 &&
5265       (OrigIdx != 0 || !Vec.isUndef())) {
5266     if (VecVT.getVectorMinNumElements() >= 8 &&
5267         SubVecVT.getVectorMinNumElements() >= 8) {
5268       assert(OrigIdx % 8 == 0 && "Invalid index");
5269       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
5270              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
5271              "Unexpected mask vector lowering");
5272       OrigIdx /= 8;
5273       SubVecVT =
5274           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
5275                            SubVecVT.isScalableVector());
5276       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
5277                                VecVT.isScalableVector());
5278       Vec = DAG.getBitcast(VecVT, Vec);
5279       SubVec = DAG.getBitcast(SubVecVT, SubVec);
5280     } else {
5281       // We can't slide this mask vector up indexed by its i1 elements.
5282       // This poses a problem when we wish to insert a scalable vector which
5283       // can't be re-expressed as a larger type. Just choose the slow path and
5284       // extend to a larger type, then truncate back down.
5285       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
5286       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
5287       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
5288       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
5289       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
5290                         Op.getOperand(2));
5291       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
5292       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
5293     }
5294   }
5295 
5296   // If the subvector vector is a fixed-length type, we cannot use subregister
5297   // manipulation to simplify the codegen; we don't know which register of a
5298   // LMUL group contains the specific subvector as we only know the minimum
5299   // register size. Therefore we must slide the vector group up the full
5300   // amount.
5301   if (SubVecVT.isFixedLengthVector()) {
5302     if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
5303       return Op;
5304     MVT ContainerVT = VecVT;
5305     if (VecVT.isFixedLengthVector()) {
5306       ContainerVT = getContainerForFixedLengthVector(VecVT);
5307       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5308     }
5309     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
5310                          DAG.getUNDEF(ContainerVT), SubVec,
5311                          DAG.getConstant(0, DL, XLenVT));
5312     if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
5313       SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
5314       return DAG.getBitcast(Op.getValueType(), SubVec);
5315     }
5316     SDValue Mask =
5317         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
5318     // Set the vector length to only the number of elements we care about. Note
5319     // that for slideup this includes the offset.
5320     SDValue VL =
5321         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
5322     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
5323     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
5324                                   SubVec, SlideupAmt, Mask, VL);
5325     if (VecVT.isFixedLengthVector())
5326       Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
5327     return DAG.getBitcast(Op.getValueType(), Slideup);
5328   }
5329 
5330   unsigned SubRegIdx, RemIdx;
5331   std::tie(SubRegIdx, RemIdx) =
5332       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
5333           VecVT, SubVecVT, OrigIdx, TRI);
5334 
5335   RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
5336   bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
5337                          SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
5338                          SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
5339 
5340   // 1. If the Idx has been completely eliminated and this subvector's size is
5341   // a vector register or a multiple thereof, or the surrounding elements are
5342   // undef, then this is a subvector insert which naturally aligns to a vector
5343   // register. These can easily be handled using subregister manipulation.
5344   // 2. If the subvector is smaller than a vector register, then the insertion
5345   // must preserve the undisturbed elements of the register. We do this by
5346   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
5347   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
5348   // subvector within the vector register, and an INSERT_SUBVECTOR of that
5349   // LMUL=1 type back into the larger vector (resolving to another subregister
5350   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
5351   // to avoid allocating a large register group to hold our subvector.
5352   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
5353     return Op;
5354 
5355   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
5356   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
5357   // (in our case undisturbed). This means we can set up a subvector insertion
5358   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
5359   // size of the subvector.
5360   MVT InterSubVT = VecVT;
5361   SDValue AlignedExtract = Vec;
5362   unsigned AlignedIdx = OrigIdx - RemIdx;
5363   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
5364     InterSubVT = getLMUL1VT(VecVT);
5365     // Extract a subvector equal to the nearest full vector register type. This
5366     // should resolve to a EXTRACT_SUBREG instruction.
5367     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
5368                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
5369   }
5370 
5371   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
5372   // For scalable vectors this must be further multiplied by vscale.
5373   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
5374 
5375   SDValue Mask, VL;
5376   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
5377 
5378   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
5379   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
5380   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
5381   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
5382 
5383   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
5384                        DAG.getUNDEF(InterSubVT), SubVec,
5385                        DAG.getConstant(0, DL, XLenVT));
5386 
5387   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
5388                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
5389 
5390   // If required, insert this subvector back into the correct vector register.
5391   // This should resolve to an INSERT_SUBREG instruction.
5392   if (VecVT.bitsGT(InterSubVT))
5393     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
5394                           DAG.getConstant(AlignedIdx, DL, XLenVT));
5395 
5396   // We might have bitcast from a mask type: cast back to the original type if
5397   // required.
5398   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
5399 }
5400 
5401 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
5402                                                     SelectionDAG &DAG) const {
5403   SDValue Vec = Op.getOperand(0);
5404   MVT SubVecVT = Op.getSimpleValueType();
5405   MVT VecVT = Vec.getSimpleValueType();
5406 
5407   SDLoc DL(Op);
5408   MVT XLenVT = Subtarget.getXLenVT();
5409   unsigned OrigIdx = Op.getConstantOperandVal(1);
5410   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
5411 
5412   // We don't have the ability to slide mask vectors down indexed by their i1
5413   // elements; the smallest we can do is i8. Often we are able to bitcast to
5414   // equivalent i8 vectors. Note that when extracting a fixed-length vector
5415   // from a scalable one, we might not necessarily have enough scalable
5416   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
5417   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
5418     if (VecVT.getVectorMinNumElements() >= 8 &&
5419         SubVecVT.getVectorMinNumElements() >= 8) {
5420       assert(OrigIdx % 8 == 0 && "Invalid index");
5421       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
5422              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
5423              "Unexpected mask vector lowering");
5424       OrigIdx /= 8;
5425       SubVecVT =
5426           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
5427                            SubVecVT.isScalableVector());
5428       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
5429                                VecVT.isScalableVector());
5430       Vec = DAG.getBitcast(VecVT, Vec);
5431     } else {
5432       // We can't slide this mask vector down, indexed by its i1 elements.
5433       // This poses a problem when we wish to extract a scalable vector which
5434       // can't be re-expressed as a larger type. Just choose the slow path and
5435       // extend to a larger type, then truncate back down.
5436       // TODO: We could probably improve this when extracting certain fixed
5437       // from fixed, where we can extract as i8 and shift the correct element
5438       // right to reach the desired subvector?
5439       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
5440       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
5441       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
5442       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
5443                         Op.getOperand(1));
5444       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
5445       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
5446     }
5447   }
5448 
5449   // If the subvector vector is a fixed-length type, we cannot use subregister
5450   // manipulation to simplify the codegen; we don't know which register of a
5451   // LMUL group contains the specific subvector as we only know the minimum
5452   // register size. Therefore we must slide the vector group down the full
5453   // amount.
5454   if (SubVecVT.isFixedLengthVector()) {
5455     // With an index of 0 this is a cast-like subvector, which can be performed
5456     // with subregister operations.
5457     if (OrigIdx == 0)
5458       return Op;
5459     MVT ContainerVT = VecVT;
5460     if (VecVT.isFixedLengthVector()) {
5461       ContainerVT = getContainerForFixedLengthVector(VecVT);
5462       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5463     }
5464     SDValue Mask =
5465         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
5466     // Set the vector length to only the number of elements we care about. This
5467     // avoids sliding down elements we're going to discard straight away.
5468     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
5469     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
5470     SDValue Slidedown =
5471         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
5472                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
5473     // Now we can use a cast-like subvector extract to get the result.
5474     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
5475                             DAG.getConstant(0, DL, XLenVT));
5476     return DAG.getBitcast(Op.getValueType(), Slidedown);
5477   }
5478 
5479   unsigned SubRegIdx, RemIdx;
5480   std::tie(SubRegIdx, RemIdx) =
5481       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
5482           VecVT, SubVecVT, OrigIdx, TRI);
5483 
5484   // If the Idx has been completely eliminated then this is a subvector extract
5485   // which naturally aligns to a vector register. These can easily be handled
5486   // using subregister manipulation.
5487   if (RemIdx == 0)
5488     return Op;
5489 
5490   // Else we must shift our vector register directly to extract the subvector.
5491   // Do this using VSLIDEDOWN.
5492 
5493   // If the vector type is an LMUL-group type, extract a subvector equal to the
5494   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
5495   // instruction.
5496   MVT InterSubVT = VecVT;
5497   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
5498     InterSubVT = getLMUL1VT(VecVT);
5499     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
5500                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
5501   }
5502 
5503   // Slide this vector register down by the desired number of elements in order
5504   // to place the desired subvector starting at element 0.
5505   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
5506   // For scalable vectors this must be further multiplied by vscale.
5507   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
5508 
5509   SDValue Mask, VL;
5510   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
5511   SDValue Slidedown =
5512       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
5513                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
5514 
5515   // Now the vector is in the right position, extract our final subvector. This
5516   // should resolve to a COPY.
5517   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
5518                           DAG.getConstant(0, DL, XLenVT));
5519 
5520   // We might have bitcast from a mask type: cast back to the original type if
5521   // required.
5522   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
5523 }
5524 
5525 // Lower step_vector to the vid instruction. Any non-identity step value must
5526 // be accounted for my manual expansion.
5527 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
5528                                               SelectionDAG &DAG) const {
5529   SDLoc DL(Op);
5530   MVT VT = Op.getSimpleValueType();
5531   MVT XLenVT = Subtarget.getXLenVT();
5532   SDValue Mask, VL;
5533   std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
5534   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
5535   uint64_t StepValImm = Op.getConstantOperandVal(0);
5536   if (StepValImm != 1) {
5537     if (isPowerOf2_64(StepValImm)) {
5538       SDValue StepVal =
5539           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
5540                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
5541       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
5542     } else {
5543       SDValue StepVal = lowerScalarSplat(
5544           SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
5545           VL, VT, DL, DAG, Subtarget);
5546       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
5547     }
5548   }
5549   return StepVec;
5550 }
5551 
5552 // Implement vector_reverse using vrgather.vv with indices determined by
5553 // subtracting the id of each element from (VLMAX-1). This will convert
5554 // the indices like so:
5555 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
5556 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
5557 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
5558                                                  SelectionDAG &DAG) const {
5559   SDLoc DL(Op);
5560   MVT VecVT = Op.getSimpleValueType();
5561   unsigned EltSize = VecVT.getScalarSizeInBits();
5562   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
5563 
5564   unsigned MaxVLMAX = 0;
5565   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
5566   if (VectorBitsMax != 0)
5567     MaxVLMAX =
5568         RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
5569 
5570   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
5571   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
5572 
5573   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
5574   // to use vrgatherei16.vv.
5575   // TODO: It's also possible to use vrgatherei16.vv for other types to
5576   // decrease register width for the index calculation.
5577   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
5578     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
5579     // Reverse each half, then reassemble them in reverse order.
5580     // NOTE: It's also possible that after splitting that VLMAX no longer
5581     // requires vrgatherei16.vv.
5582     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
5583       SDValue Lo, Hi;
5584       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
5585       EVT LoVT, HiVT;
5586       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
5587       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
5588       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
5589       // Reassemble the low and high pieces reversed.
5590       // FIXME: This is a CONCAT_VECTORS.
5591       SDValue Res =
5592           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
5593                       DAG.getIntPtrConstant(0, DL));
5594       return DAG.getNode(
5595           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
5596           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
5597     }
5598 
5599     // Just promote the int type to i16 which will double the LMUL.
5600     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
5601     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
5602   }
5603 
5604   MVT XLenVT = Subtarget.getXLenVT();
5605   SDValue Mask, VL;
5606   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
5607 
5608   // Calculate VLMAX-1 for the desired SEW.
5609   unsigned MinElts = VecVT.getVectorMinNumElements();
5610   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
5611                               DAG.getConstant(MinElts, DL, XLenVT));
5612   SDValue VLMinus1 =
5613       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
5614 
5615   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
5616   bool IsRV32E64 =
5617       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
5618   SDValue SplatVL;
5619   if (!IsRV32E64)
5620     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
5621   else
5622     SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
5623                           VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
5624 
5625   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
5626   SDValue Indices =
5627       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
5628 
5629   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
5630 }
5631 
5632 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
5633                                                 SelectionDAG &DAG) const {
5634   SDLoc DL(Op);
5635   SDValue V1 = Op.getOperand(0);
5636   SDValue V2 = Op.getOperand(1);
5637   MVT XLenVT = Subtarget.getXLenVT();
5638   MVT VecVT = Op.getSimpleValueType();
5639 
5640   unsigned MinElts = VecVT.getVectorMinNumElements();
5641   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
5642                               DAG.getConstant(MinElts, DL, XLenVT));
5643 
5644   int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
5645   SDValue DownOffset, UpOffset;
5646   if (ImmValue >= 0) {
5647     // The operand is a TargetConstant, we need to rebuild it as a regular
5648     // constant.
5649     DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
5650     UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
5651   } else {
5652     // The operand is a TargetConstant, we need to rebuild it as a regular
5653     // constant rather than negating the original operand.
5654     UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
5655     DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
5656   }
5657 
5658   SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
5659 
5660   SDValue SlideDown =
5661       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT), V1,
5662                   DownOffset, TrueMask, UpOffset);
5663   return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, SlideDown, V2, UpOffset,
5664                      TrueMask,
5665                      DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT));
5666 }
5667 
5668 SDValue
5669 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
5670                                                      SelectionDAG &DAG) const {
5671   SDLoc DL(Op);
5672   auto *Load = cast<LoadSDNode>(Op);
5673 
5674   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5675                                         Load->getMemoryVT(),
5676                                         *Load->getMemOperand()) &&
5677          "Expecting a correctly-aligned load");
5678 
5679   MVT VT = Op.getSimpleValueType();
5680   MVT XLenVT = Subtarget.getXLenVT();
5681   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5682 
5683   SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
5684 
5685   bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
5686   SDValue IntID = DAG.getTargetConstant(
5687       IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
5688   SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
5689   if (!IsMaskOp)
5690     Ops.push_back(DAG.getUNDEF(ContainerVT));
5691   Ops.push_back(Load->getBasePtr());
5692   Ops.push_back(VL);
5693   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5694   SDValue NewLoad =
5695       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
5696                               Load->getMemoryVT(), Load->getMemOperand());
5697 
5698   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5699   return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
5700 }
5701 
5702 SDValue
5703 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
5704                                                       SelectionDAG &DAG) const {
5705   SDLoc DL(Op);
5706   auto *Store = cast<StoreSDNode>(Op);
5707 
5708   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5709                                         Store->getMemoryVT(),
5710                                         *Store->getMemOperand()) &&
5711          "Expecting a correctly-aligned store");
5712 
5713   SDValue StoreVal = Store->getValue();
5714   MVT VT = StoreVal.getSimpleValueType();
5715   MVT XLenVT = Subtarget.getXLenVT();
5716 
5717   // If the size less than a byte, we need to pad with zeros to make a byte.
5718   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
5719     VT = MVT::v8i1;
5720     StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
5721                            DAG.getConstant(0, DL, VT), StoreVal,
5722                            DAG.getIntPtrConstant(0, DL));
5723   }
5724 
5725   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5726 
5727   SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
5728 
5729   SDValue NewValue =
5730       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
5731 
5732   bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
5733   SDValue IntID = DAG.getTargetConstant(
5734       IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
5735   return DAG.getMemIntrinsicNode(
5736       ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
5737       {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
5738       Store->getMemoryVT(), Store->getMemOperand());
5739 }
5740 
5741 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
5742                                              SelectionDAG &DAG) const {
5743   SDLoc DL(Op);
5744   MVT VT = Op.getSimpleValueType();
5745 
5746   const auto *MemSD = cast<MemSDNode>(Op);
5747   EVT MemVT = MemSD->getMemoryVT();
5748   MachineMemOperand *MMO = MemSD->getMemOperand();
5749   SDValue Chain = MemSD->getChain();
5750   SDValue BasePtr = MemSD->getBasePtr();
5751 
5752   SDValue Mask, PassThru, VL;
5753   if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
5754     Mask = VPLoad->getMask();
5755     PassThru = DAG.getUNDEF(VT);
5756     VL = VPLoad->getVectorLength();
5757   } else {
5758     const auto *MLoad = cast<MaskedLoadSDNode>(Op);
5759     Mask = MLoad->getMask();
5760     PassThru = MLoad->getPassThru();
5761   }
5762 
5763   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5764 
5765   MVT XLenVT = Subtarget.getXLenVT();
5766 
5767   MVT ContainerVT = VT;
5768   if (VT.isFixedLengthVector()) {
5769     ContainerVT = getContainerForFixedLengthVector(VT);
5770     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
5771     if (!IsUnmasked) {
5772       MVT MaskVT = getMaskTypeFor(ContainerVT);
5773       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5774     }
5775   }
5776 
5777   if (!VL)
5778     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5779 
5780   unsigned IntID =
5781       IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
5782   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5783   if (IsUnmasked)
5784     Ops.push_back(DAG.getUNDEF(ContainerVT));
5785   else
5786     Ops.push_back(PassThru);
5787   Ops.push_back(BasePtr);
5788   if (!IsUnmasked)
5789     Ops.push_back(Mask);
5790   Ops.push_back(VL);
5791   if (!IsUnmasked)
5792     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
5793 
5794   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5795 
5796   SDValue Result =
5797       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
5798   Chain = Result.getValue(1);
5799 
5800   if (VT.isFixedLengthVector())
5801     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
5802 
5803   return DAG.getMergeValues({Result, Chain}, DL);
5804 }
5805 
5806 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
5807                                               SelectionDAG &DAG) const {
5808   SDLoc DL(Op);
5809 
5810   const auto *MemSD = cast<MemSDNode>(Op);
5811   EVT MemVT = MemSD->getMemoryVT();
5812   MachineMemOperand *MMO = MemSD->getMemOperand();
5813   SDValue Chain = MemSD->getChain();
5814   SDValue BasePtr = MemSD->getBasePtr();
5815   SDValue Val, Mask, VL;
5816 
5817   if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
5818     Val = VPStore->getValue();
5819     Mask = VPStore->getMask();
5820     VL = VPStore->getVectorLength();
5821   } else {
5822     const auto *MStore = cast<MaskedStoreSDNode>(Op);
5823     Val = MStore->getValue();
5824     Mask = MStore->getMask();
5825   }
5826 
5827   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5828 
5829   MVT VT = Val.getSimpleValueType();
5830   MVT XLenVT = Subtarget.getXLenVT();
5831 
5832   MVT ContainerVT = VT;
5833   if (VT.isFixedLengthVector()) {
5834     ContainerVT = getContainerForFixedLengthVector(VT);
5835 
5836     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
5837     if (!IsUnmasked) {
5838       MVT MaskVT = getMaskTypeFor(ContainerVT);
5839       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5840     }
5841   }
5842 
5843   if (!VL)
5844     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
5845 
5846   unsigned IntID =
5847       IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
5848   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
5849   Ops.push_back(Val);
5850   Ops.push_back(BasePtr);
5851   if (!IsUnmasked)
5852     Ops.push_back(Mask);
5853   Ops.push_back(VL);
5854 
5855   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
5856                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
5857 }
5858 
5859 SDValue
5860 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
5861                                                       SelectionDAG &DAG) const {
5862   MVT InVT = Op.getOperand(0).getSimpleValueType();
5863   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
5864 
5865   MVT VT = Op.getSimpleValueType();
5866 
5867   SDValue Op1 =
5868       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
5869   SDValue Op2 =
5870       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
5871 
5872   SDLoc DL(Op);
5873   SDValue VL =
5874       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
5875 
5876   MVT MaskVT = getMaskTypeFor(ContainerVT);
5877   SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
5878 
5879   SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
5880                             Op.getOperand(2), Mask, VL);
5881 
5882   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
5883 }
5884 
5885 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
5886     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
5887   MVT VT = Op.getSimpleValueType();
5888 
5889   if (VT.getVectorElementType() == MVT::i1)
5890     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
5891 
5892   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
5893 }
5894 
5895 SDValue
5896 RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op,
5897                                                       SelectionDAG &DAG) const {
5898   unsigned Opc;
5899   switch (Op.getOpcode()) {
5900   default: llvm_unreachable("Unexpected opcode!");
5901   case ISD::SHL: Opc = RISCVISD::SHL_VL; break;
5902   case ISD::SRA: Opc = RISCVISD::SRA_VL; break;
5903   case ISD::SRL: Opc = RISCVISD::SRL_VL; break;
5904   }
5905 
5906   return lowerToScalableOp(Op, DAG, Opc);
5907 }
5908 
5909 // Lower vector ABS to smax(X, sub(0, X)).
5910 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
5911   SDLoc DL(Op);
5912   MVT VT = Op.getSimpleValueType();
5913   SDValue X = Op.getOperand(0);
5914 
5915   assert(VT.isFixedLengthVector() && "Unexpected type");
5916 
5917   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5918   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5919 
5920   SDValue Mask, VL;
5921   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5922 
5923   SDValue SplatZero = DAG.getNode(
5924       RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5925       DAG.getConstant(0, DL, Subtarget.getXLenVT()));
5926   SDValue NegX =
5927       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
5928   SDValue Max =
5929       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
5930 
5931   return convertFromScalableVector(VT, Max, DAG, Subtarget);
5932 }
5933 
5934 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
5935     SDValue Op, SelectionDAG &DAG) const {
5936   SDLoc DL(Op);
5937   MVT VT = Op.getSimpleValueType();
5938   SDValue Mag = Op.getOperand(0);
5939   SDValue Sign = Op.getOperand(1);
5940   assert(Mag.getValueType() == Sign.getValueType() &&
5941          "Can only handle COPYSIGN with matching types.");
5942 
5943   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5944   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
5945   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
5946 
5947   SDValue Mask, VL;
5948   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5949 
5950   SDValue CopySign =
5951       DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
5952 
5953   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
5954 }
5955 
5956 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
5957     SDValue Op, SelectionDAG &DAG) const {
5958   MVT VT = Op.getSimpleValueType();
5959   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5960 
5961   MVT I1ContainerVT =
5962       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
5963 
5964   SDValue CC =
5965       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
5966   SDValue Op1 =
5967       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
5968   SDValue Op2 =
5969       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
5970 
5971   SDLoc DL(Op);
5972   SDValue Mask, VL;
5973   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5974 
5975   SDValue Select =
5976       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
5977 
5978   return convertFromScalableVector(VT, Select, DAG, Subtarget);
5979 }
5980 
5981 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
5982                                                unsigned NewOpc,
5983                                                bool HasMask) const {
5984   MVT VT = Op.getSimpleValueType();
5985   MVT ContainerVT = getContainerForFixedLengthVector(VT);
5986 
5987   // Create list of operands by converting existing ones to scalable types.
5988   SmallVector<SDValue, 6> Ops;
5989   for (const SDValue &V : Op->op_values()) {
5990     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
5991 
5992     // Pass through non-vector operands.
5993     if (!V.getValueType().isVector()) {
5994       Ops.push_back(V);
5995       continue;
5996     }
5997 
5998     // "cast" fixed length vector to a scalable vector.
5999     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
6000            "Only fixed length vectors are supported!");
6001     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
6002   }
6003 
6004   SDLoc DL(Op);
6005   SDValue Mask, VL;
6006   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6007   if (HasMask)
6008     Ops.push_back(Mask);
6009   Ops.push_back(VL);
6010 
6011   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
6012   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
6013 }
6014 
6015 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
6016 // * Operands of each node are assumed to be in the same order.
6017 // * The EVL operand is promoted from i32 to i64 on RV64.
6018 // * Fixed-length vectors are converted to their scalable-vector container
6019 //   types.
6020 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
6021                                        unsigned RISCVISDOpc) const {
6022   SDLoc DL(Op);
6023   MVT VT = Op.getSimpleValueType();
6024   SmallVector<SDValue, 4> Ops;
6025 
6026   for (const auto &OpIdx : enumerate(Op->ops())) {
6027     SDValue V = OpIdx.value();
6028     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
6029     // Pass through operands which aren't fixed-length vectors.
6030     if (!V.getValueType().isFixedLengthVector()) {
6031       Ops.push_back(V);
6032       continue;
6033     }
6034     // "cast" fixed length vector to a scalable vector.
6035     MVT OpVT = V.getSimpleValueType();
6036     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
6037     assert(useRVVForFixedLengthVectorVT(OpVT) &&
6038            "Only fixed length vectors are supported!");
6039     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
6040   }
6041 
6042   if (!VT.isFixedLengthVector())
6043     return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
6044 
6045   MVT ContainerVT = getContainerForFixedLengthVector(VT);
6046 
6047   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
6048 
6049   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
6050 }
6051 
6052 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
6053                                               SelectionDAG &DAG) const {
6054   SDLoc DL(Op);
6055   MVT VT = Op.getSimpleValueType();
6056 
6057   SDValue Src = Op.getOperand(0);
6058   // NOTE: Mask is dropped.
6059   SDValue VL = Op.getOperand(2);
6060 
6061   MVT ContainerVT = VT;
6062   if (VT.isFixedLengthVector()) {
6063     ContainerVT = getContainerForFixedLengthVector(VT);
6064     MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
6065     Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
6066   }
6067 
6068   MVT XLenVT = Subtarget.getXLenVT();
6069   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
6070   SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6071                                   DAG.getUNDEF(ContainerVT), Zero, VL);
6072 
6073   SDValue SplatValue = DAG.getConstant(
6074       Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
6075   SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6076                               DAG.getUNDEF(ContainerVT), SplatValue, VL);
6077 
6078   SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src,
6079                                Splat, ZeroSplat, VL);
6080   if (!VT.isFixedLengthVector())
6081     return Result;
6082   return convertFromScalableVector(VT, Result, DAG, Subtarget);
6083 }
6084 
6085 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
6086                                                 SelectionDAG &DAG) const {
6087   SDLoc DL(Op);
6088   MVT VT = Op.getSimpleValueType();
6089 
6090   SDValue Op1 = Op.getOperand(0);
6091   SDValue Op2 = Op.getOperand(1);
6092   ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6093   // NOTE: Mask is dropped.
6094   SDValue VL = Op.getOperand(4);
6095 
6096   MVT ContainerVT = VT;
6097   if (VT.isFixedLengthVector()) {
6098     ContainerVT = getContainerForFixedLengthVector(VT);
6099     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
6100     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
6101   }
6102 
6103   SDValue Result;
6104   SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
6105 
6106   switch (Condition) {
6107   default:
6108     break;
6109   // X != Y  --> (X^Y)
6110   case ISD::SETNE:
6111     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
6112     break;
6113   // X == Y  --> ~(X^Y)
6114   case ISD::SETEQ: {
6115     SDValue Temp =
6116         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
6117     Result =
6118         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
6119     break;
6120   }
6121   // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
6122   // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
6123   case ISD::SETGT:
6124   case ISD::SETULT: {
6125     SDValue Temp =
6126         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
6127     Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
6128     break;
6129   }
6130   // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
6131   // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
6132   case ISD::SETLT:
6133   case ISD::SETUGT: {
6134     SDValue Temp =
6135         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
6136     Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
6137     break;
6138   }
6139   // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
6140   // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
6141   case ISD::SETGE:
6142   case ISD::SETULE: {
6143     SDValue Temp =
6144         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
6145     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
6146     break;
6147   }
6148   // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
6149   // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
6150   case ISD::SETLE:
6151   case ISD::SETUGE: {
6152     SDValue Temp =
6153         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
6154     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
6155     break;
6156   }
6157   }
6158 
6159   if (!VT.isFixedLengthVector())
6160     return Result;
6161   return convertFromScalableVector(VT, Result, DAG, Subtarget);
6162 }
6163 
6164 // Lower Floating-Point/Integer Type-Convert VP SDNodes
6165 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
6166                                                 unsigned RISCVISDOpc) const {
6167   SDLoc DL(Op);
6168 
6169   SDValue Src = Op.getOperand(0);
6170   SDValue Mask = Op.getOperand(1);
6171   SDValue VL = Op.getOperand(2);
6172 
6173   MVT DstVT = Op.getSimpleValueType();
6174   MVT SrcVT = Src.getSimpleValueType();
6175   if (DstVT.isFixedLengthVector()) {
6176     DstVT = getContainerForFixedLengthVector(DstVT);
6177     SrcVT = getContainerForFixedLengthVector(SrcVT);
6178     Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
6179     MVT MaskVT = getMaskTypeFor(DstVT);
6180     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6181   }
6182 
6183   unsigned RISCVISDExtOpc = (RISCVISDOpc == RISCVISD::SINT_TO_FP_VL ||
6184                              RISCVISDOpc == RISCVISD::FP_TO_SINT_VL)
6185                                 ? RISCVISD::VSEXT_VL
6186                                 : RISCVISD::VZEXT_VL;
6187 
6188   unsigned DstEltSize = DstVT.getScalarSizeInBits();
6189   unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
6190 
6191   SDValue Result;
6192   if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
6193     if (SrcVT.isInteger()) {
6194       assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
6195 
6196       // Do we need to do any pre-widening before converting?
6197       if (SrcEltSize == 1) {
6198         MVT IntVT = DstVT.changeVectorElementTypeToInteger();
6199         MVT XLenVT = Subtarget.getXLenVT();
6200         SDValue Zero = DAG.getConstant(0, DL, XLenVT);
6201         SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
6202                                         DAG.getUNDEF(IntVT), Zero, VL);
6203         SDValue One = DAG.getConstant(
6204             RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
6205         SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
6206                                        DAG.getUNDEF(IntVT), One, VL);
6207         Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat,
6208                           ZeroSplat, VL);
6209       } else if (DstEltSize > (2 * SrcEltSize)) {
6210         // Widen before converting.
6211         MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
6212                                      DstVT.getVectorElementCount());
6213         Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
6214       }
6215 
6216       Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
6217     } else {
6218       assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
6219              "Wrong input/output vector types");
6220 
6221       // Convert f16 to f32 then convert f32 to i64.
6222       if (DstEltSize > (2 * SrcEltSize)) {
6223         assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
6224         MVT InterimFVT =
6225             MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
6226         Src =
6227             DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
6228       }
6229 
6230       Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
6231     }
6232   } else { // Narrowing + Conversion
6233     if (SrcVT.isInteger()) {
6234       assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
6235       // First do a narrowing convert to an FP type half the size, then round
6236       // the FP type to a small FP type if needed.
6237 
6238       MVT InterimFVT = DstVT;
6239       if (SrcEltSize > (2 * DstEltSize)) {
6240         assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
6241         assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
6242         InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
6243       }
6244 
6245       Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
6246 
6247       if (InterimFVT != DstVT) {
6248         Src = Result;
6249         Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
6250       }
6251     } else {
6252       assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
6253              "Wrong input/output vector types");
6254       // First do a narrowing conversion to an integer half the size, then
6255       // truncate if needed.
6256 
6257       if (DstEltSize == 1) {
6258         // First convert to the same size integer, then convert to mask using
6259         // setcc.
6260         assert(SrcEltSize >= 16 && "Unexpected FP type!");
6261         MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
6262                                           DstVT.getVectorElementCount());
6263         Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
6264 
6265         // Compare the integer result to 0. The integer should be 0 or 1/-1,
6266         // otherwise the conversion was undefined.
6267         MVT XLenVT = Subtarget.getXLenVT();
6268         SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6269         SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
6270                                 DAG.getUNDEF(InterimIVT), SplatZero);
6271         Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, Result, SplatZero,
6272                              DAG.getCondCode(ISD::SETNE), Mask, VL);
6273       } else {
6274         MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6275                                           DstVT.getVectorElementCount());
6276 
6277         Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
6278 
6279         while (InterimIVT != DstVT) {
6280           SrcEltSize /= 2;
6281           Src = Result;
6282           InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6283                                         DstVT.getVectorElementCount());
6284           Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
6285                                Src, Mask, VL);
6286         }
6287       }
6288     }
6289   }
6290 
6291   MVT VT = Op.getSimpleValueType();
6292   if (!VT.isFixedLengthVector())
6293     return Result;
6294   return convertFromScalableVector(VT, Result, DAG, Subtarget);
6295 }
6296 
6297 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
6298                                             unsigned MaskOpc,
6299                                             unsigned VecOpc) const {
6300   MVT VT = Op.getSimpleValueType();
6301   if (VT.getVectorElementType() != MVT::i1)
6302     return lowerVPOp(Op, DAG, VecOpc);
6303 
6304   // It is safe to drop mask parameter as masked-off elements are undef.
6305   SDValue Op1 = Op->getOperand(0);
6306   SDValue Op2 = Op->getOperand(1);
6307   SDValue VL = Op->getOperand(3);
6308 
6309   MVT ContainerVT = VT;
6310   const bool IsFixed = VT.isFixedLengthVector();
6311   if (IsFixed) {
6312     ContainerVT = getContainerForFixedLengthVector(VT);
6313     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
6314     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
6315   }
6316 
6317   SDLoc DL(Op);
6318   SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
6319   if (!IsFixed)
6320     return Val;
6321   return convertFromScalableVector(VT, Val, DAG, Subtarget);
6322 }
6323 
6324 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
6325 // matched to a RVV indexed load. The RVV indexed load instructions only
6326 // support the "unsigned unscaled" addressing mode; indices are implicitly
6327 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
6328 // signed or scaled indexing is extended to the XLEN value type and scaled
6329 // accordingly.
6330 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
6331                                                SelectionDAG &DAG) const {
6332   SDLoc DL(Op);
6333   MVT VT = Op.getSimpleValueType();
6334 
6335   const auto *MemSD = cast<MemSDNode>(Op.getNode());
6336   EVT MemVT = MemSD->getMemoryVT();
6337   MachineMemOperand *MMO = MemSD->getMemOperand();
6338   SDValue Chain = MemSD->getChain();
6339   SDValue BasePtr = MemSD->getBasePtr();
6340 
6341   ISD::LoadExtType LoadExtType;
6342   SDValue Index, Mask, PassThru, VL;
6343 
6344   if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
6345     Index = VPGN->getIndex();
6346     Mask = VPGN->getMask();
6347     PassThru = DAG.getUNDEF(VT);
6348     VL = VPGN->getVectorLength();
6349     // VP doesn't support extending loads.
6350     LoadExtType = ISD::NON_EXTLOAD;
6351   } else {
6352     // Else it must be a MGATHER.
6353     auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
6354     Index = MGN->getIndex();
6355     Mask = MGN->getMask();
6356     PassThru = MGN->getPassThru();
6357     LoadExtType = MGN->getExtensionType();
6358   }
6359 
6360   MVT IndexVT = Index.getSimpleValueType();
6361   MVT XLenVT = Subtarget.getXLenVT();
6362 
6363   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
6364          "Unexpected VTs!");
6365   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
6366   // Targets have to explicitly opt-in for extending vector loads.
6367   assert(LoadExtType == ISD::NON_EXTLOAD &&
6368          "Unexpected extending MGATHER/VP_GATHER");
6369   (void)LoadExtType;
6370 
6371   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
6372   // the selection of the masked intrinsics doesn't do this for us.
6373   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
6374 
6375   MVT ContainerVT = VT;
6376   if (VT.isFixedLengthVector()) {
6377     ContainerVT = getContainerForFixedLengthVector(VT);
6378     IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
6379                                ContainerVT.getVectorElementCount());
6380 
6381     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
6382 
6383     if (!IsUnmasked) {
6384       MVT MaskVT = getMaskTypeFor(ContainerVT);
6385       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6386       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
6387     }
6388   }
6389 
6390   if (!VL)
6391     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6392 
6393   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
6394     IndexVT = IndexVT.changeVectorElementType(XLenVT);
6395     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
6396                                    VL);
6397     Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
6398                         TrueMask, VL);
6399   }
6400 
6401   unsigned IntID =
6402       IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
6403   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
6404   if (IsUnmasked)
6405     Ops.push_back(DAG.getUNDEF(ContainerVT));
6406   else
6407     Ops.push_back(PassThru);
6408   Ops.push_back(BasePtr);
6409   Ops.push_back(Index);
6410   if (!IsUnmasked)
6411     Ops.push_back(Mask);
6412   Ops.push_back(VL);
6413   if (!IsUnmasked)
6414     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
6415 
6416   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
6417   SDValue Result =
6418       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
6419   Chain = Result.getValue(1);
6420 
6421   if (VT.isFixedLengthVector())
6422     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
6423 
6424   return DAG.getMergeValues({Result, Chain}, DL);
6425 }
6426 
6427 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
6428 // matched to a RVV indexed store. The RVV indexed store instructions only
6429 // support the "unsigned unscaled" addressing mode; indices are implicitly
6430 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
6431 // signed or scaled indexing is extended to the XLEN value type and scaled
6432 // accordingly.
6433 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
6434                                                 SelectionDAG &DAG) const {
6435   SDLoc DL(Op);
6436   const auto *MemSD = cast<MemSDNode>(Op.getNode());
6437   EVT MemVT = MemSD->getMemoryVT();
6438   MachineMemOperand *MMO = MemSD->getMemOperand();
6439   SDValue Chain = MemSD->getChain();
6440   SDValue BasePtr = MemSD->getBasePtr();
6441 
6442   bool IsTruncatingStore = false;
6443   SDValue Index, Mask, Val, VL;
6444 
6445   if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
6446     Index = VPSN->getIndex();
6447     Mask = VPSN->getMask();
6448     Val = VPSN->getValue();
6449     VL = VPSN->getVectorLength();
6450     // VP doesn't support truncating stores.
6451     IsTruncatingStore = false;
6452   } else {
6453     // Else it must be a MSCATTER.
6454     auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
6455     Index = MSN->getIndex();
6456     Mask = MSN->getMask();
6457     Val = MSN->getValue();
6458     IsTruncatingStore = MSN->isTruncatingStore();
6459   }
6460 
6461   MVT VT = Val.getSimpleValueType();
6462   MVT IndexVT = Index.getSimpleValueType();
6463   MVT XLenVT = Subtarget.getXLenVT();
6464 
6465   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
6466          "Unexpected VTs!");
6467   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
6468   // Targets have to explicitly opt-in for extending vector loads and
6469   // truncating vector stores.
6470   assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
6471   (void)IsTruncatingStore;
6472 
6473   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
6474   // the selection of the masked intrinsics doesn't do this for us.
6475   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
6476 
6477   MVT ContainerVT = VT;
6478   if (VT.isFixedLengthVector()) {
6479     ContainerVT = getContainerForFixedLengthVector(VT);
6480     IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
6481                                ContainerVT.getVectorElementCount());
6482 
6483     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
6484     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
6485 
6486     if (!IsUnmasked) {
6487       MVT MaskVT = getMaskTypeFor(ContainerVT);
6488       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6489     }
6490   }
6491 
6492   if (!VL)
6493     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6494 
6495   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
6496     IndexVT = IndexVT.changeVectorElementType(XLenVT);
6497     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
6498                                    VL);
6499     Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
6500                         TrueMask, VL);
6501   }
6502 
6503   unsigned IntID =
6504       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
6505   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
6506   Ops.push_back(Val);
6507   Ops.push_back(BasePtr);
6508   Ops.push_back(Index);
6509   if (!IsUnmasked)
6510     Ops.push_back(Mask);
6511   Ops.push_back(VL);
6512 
6513   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
6514                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
6515 }
6516 
6517 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
6518                                                SelectionDAG &DAG) const {
6519   const MVT XLenVT = Subtarget.getXLenVT();
6520   SDLoc DL(Op);
6521   SDValue Chain = Op->getOperand(0);
6522   SDValue SysRegNo = DAG.getTargetConstant(
6523       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
6524   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
6525   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
6526 
6527   // Encoding used for rounding mode in RISCV differs from that used in
6528   // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
6529   // table, which consists of a sequence of 4-bit fields, each representing
6530   // corresponding FLT_ROUNDS mode.
6531   static const int Table =
6532       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
6533       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
6534       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
6535       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
6536       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
6537 
6538   SDValue Shift =
6539       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
6540   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
6541                                 DAG.getConstant(Table, DL, XLenVT), Shift);
6542   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
6543                                DAG.getConstant(7, DL, XLenVT));
6544 
6545   return DAG.getMergeValues({Masked, Chain}, DL);
6546 }
6547 
6548 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
6549                                                SelectionDAG &DAG) const {
6550   const MVT XLenVT = Subtarget.getXLenVT();
6551   SDLoc DL(Op);
6552   SDValue Chain = Op->getOperand(0);
6553   SDValue RMValue = Op->getOperand(1);
6554   SDValue SysRegNo = DAG.getTargetConstant(
6555       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
6556 
6557   // Encoding used for rounding mode in RISCV differs from that used in
6558   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
6559   // a table, which consists of a sequence of 4-bit fields, each representing
6560   // corresponding RISCV mode.
6561   static const unsigned Table =
6562       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
6563       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
6564       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
6565       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
6566       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
6567 
6568   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
6569                               DAG.getConstant(2, DL, XLenVT));
6570   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
6571                                 DAG.getConstant(Table, DL, XLenVT), Shift);
6572   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
6573                         DAG.getConstant(0x7, DL, XLenVT));
6574   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
6575                      RMValue);
6576 }
6577 
6578 static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) {
6579   switch (IntNo) {
6580   default:
6581     llvm_unreachable("Unexpected Intrinsic");
6582   case Intrinsic::riscv_bcompress:
6583     return RISCVISD::BCOMPRESSW;
6584   case Intrinsic::riscv_bdecompress:
6585     return RISCVISD::BDECOMPRESSW;
6586   case Intrinsic::riscv_bfp:
6587     return RISCVISD::BFPW;
6588   case Intrinsic::riscv_fsl:
6589     return RISCVISD::FSLW;
6590   case Intrinsic::riscv_fsr:
6591     return RISCVISD::FSRW;
6592   }
6593 }
6594 
6595 // Converts the given intrinsic to a i64 operation with any extension.
6596 static SDValue customLegalizeToWOpByIntr(SDNode *N, SelectionDAG &DAG,
6597                                          unsigned IntNo) {
6598   SDLoc DL(N);
6599   RISCVISD::NodeType WOpcode = getRISCVWOpcodeByIntr(IntNo);
6600   // Deal with the Instruction Operands
6601   SmallVector<SDValue, 3> NewOps;
6602   for (SDValue Op : drop_begin(N->ops()))
6603     // Promote the operand to i64 type
6604     NewOps.push_back(DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op));
6605   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOps);
6606   // ReplaceNodeResults requires we maintain the same type for the return value.
6607   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
6608 }
6609 
6610 // Returns the opcode of the target-specific SDNode that implements the 32-bit
6611 // form of the given Opcode.
6612 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
6613   switch (Opcode) {
6614   default:
6615     llvm_unreachable("Unexpected opcode");
6616   case ISD::SHL:
6617     return RISCVISD::SLLW;
6618   case ISD::SRA:
6619     return RISCVISD::SRAW;
6620   case ISD::SRL:
6621     return RISCVISD::SRLW;
6622   case ISD::SDIV:
6623     return RISCVISD::DIVW;
6624   case ISD::UDIV:
6625     return RISCVISD::DIVUW;
6626   case ISD::UREM:
6627     return RISCVISD::REMUW;
6628   case ISD::ROTL:
6629     return RISCVISD::ROLW;
6630   case ISD::ROTR:
6631     return RISCVISD::RORW;
6632   }
6633 }
6634 
6635 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
6636 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
6637 // otherwise be promoted to i64, making it difficult to select the
6638 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
6639 // type i8/i16/i32 is lost.
6640 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
6641                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
6642   SDLoc DL(N);
6643   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
6644   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
6645   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
6646   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
6647   // ReplaceNodeResults requires we maintain the same type for the return value.
6648   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
6649 }
6650 
6651 // Converts the given 32-bit operation to a i64 operation with signed extension
6652 // semantic to reduce the signed extension instructions.
6653 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
6654   SDLoc DL(N);
6655   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6656   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6657   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
6658   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
6659                                DAG.getValueType(MVT::i32));
6660   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
6661 }
6662 
6663 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
6664                                              SmallVectorImpl<SDValue> &Results,
6665                                              SelectionDAG &DAG) const {
6666   SDLoc DL(N);
6667   switch (N->getOpcode()) {
6668   default:
6669     llvm_unreachable("Don't know how to custom type legalize this operation!");
6670   case ISD::STRICT_FP_TO_SINT:
6671   case ISD::STRICT_FP_TO_UINT:
6672   case ISD::FP_TO_SINT:
6673   case ISD::FP_TO_UINT: {
6674     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6675            "Unexpected custom legalisation");
6676     bool IsStrict = N->isStrictFPOpcode();
6677     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
6678                     N->getOpcode() == ISD::STRICT_FP_TO_SINT;
6679     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
6680     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
6681         TargetLowering::TypeSoftenFloat) {
6682       if (!isTypeLegal(Op0.getValueType()))
6683         return;
6684       if (IsStrict) {
6685         unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
6686                                 : RISCVISD::STRICT_FCVT_WU_RV64;
6687         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
6688         SDValue Res = DAG.getNode(
6689             Opc, DL, VTs, N->getOperand(0), Op0,
6690             DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
6691         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6692         Results.push_back(Res.getValue(1));
6693         return;
6694       }
6695       unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
6696       SDValue Res =
6697           DAG.getNode(Opc, DL, MVT::i64, Op0,
6698                       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
6699       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6700       return;
6701     }
6702     // If the FP type needs to be softened, emit a library call using the 'si'
6703     // version. If we left it to default legalization we'd end up with 'di'. If
6704     // the FP type doesn't need to be softened just let generic type
6705     // legalization promote the result type.
6706     RTLIB::Libcall LC;
6707     if (IsSigned)
6708       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
6709     else
6710       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
6711     MakeLibCallOptions CallOptions;
6712     EVT OpVT = Op0.getValueType();
6713     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
6714     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
6715     SDValue Result;
6716     std::tie(Result, Chain) =
6717         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
6718     Results.push_back(Result);
6719     if (IsStrict)
6720       Results.push_back(Chain);
6721     break;
6722   }
6723   case ISD::READCYCLECOUNTER: {
6724     assert(!Subtarget.is64Bit() &&
6725            "READCYCLECOUNTER only has custom type legalization on riscv32");
6726 
6727     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
6728     SDValue RCW =
6729         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
6730 
6731     Results.push_back(
6732         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
6733     Results.push_back(RCW.getValue(2));
6734     break;
6735   }
6736   case ISD::MUL: {
6737     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
6738     unsigned XLen = Subtarget.getXLen();
6739     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
6740     if (Size > XLen) {
6741       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
6742       SDValue LHS = N->getOperand(0);
6743       SDValue RHS = N->getOperand(1);
6744       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
6745 
6746       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
6747       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
6748       // We need exactly one side to be unsigned.
6749       if (LHSIsU == RHSIsU)
6750         return;
6751 
6752       auto MakeMULPair = [&](SDValue S, SDValue U) {
6753         MVT XLenVT = Subtarget.getXLenVT();
6754         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
6755         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
6756         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
6757         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
6758         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
6759       };
6760 
6761       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
6762       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
6763 
6764       // The other operand should be signed, but still prefer MULH when
6765       // possible.
6766       if (RHSIsU && LHSIsS && !RHSIsS)
6767         Results.push_back(MakeMULPair(LHS, RHS));
6768       else if (LHSIsU && RHSIsS && !LHSIsS)
6769         Results.push_back(MakeMULPair(RHS, LHS));
6770 
6771       return;
6772     }
6773     LLVM_FALLTHROUGH;
6774   }
6775   case ISD::ADD:
6776   case ISD::SUB:
6777     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6778            "Unexpected custom legalisation");
6779     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
6780     break;
6781   case ISD::SHL:
6782   case ISD::SRA:
6783   case ISD::SRL:
6784     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6785            "Unexpected custom legalisation");
6786     if (N->getOperand(1).getOpcode() != ISD::Constant) {
6787       // If we can use a BSET instruction, allow default promotion to apply.
6788       if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
6789           isOneConstant(N->getOperand(0)))
6790         break;
6791       Results.push_back(customLegalizeToWOp(N, DAG));
6792       break;
6793     }
6794 
6795     // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
6796     // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
6797     // shift amount.
6798     if (N->getOpcode() == ISD::SHL) {
6799       SDLoc DL(N);
6800       SDValue NewOp0 =
6801           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6802       SDValue NewOp1 =
6803           DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
6804       SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
6805       SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
6806                                    DAG.getValueType(MVT::i32));
6807       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6808     }
6809 
6810     break;
6811   case ISD::ROTL:
6812   case ISD::ROTR:
6813     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6814            "Unexpected custom legalisation");
6815     Results.push_back(customLegalizeToWOp(N, DAG));
6816     break;
6817   case ISD::CTTZ:
6818   case ISD::CTTZ_ZERO_UNDEF:
6819   case ISD::CTLZ:
6820   case ISD::CTLZ_ZERO_UNDEF: {
6821     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6822            "Unexpected custom legalisation");
6823 
6824     SDValue NewOp0 =
6825         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6826     bool IsCTZ =
6827         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
6828     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
6829     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
6830     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6831     return;
6832   }
6833   case ISD::SDIV:
6834   case ISD::UDIV:
6835   case ISD::UREM: {
6836     MVT VT = N->getSimpleValueType(0);
6837     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
6838            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
6839            "Unexpected custom legalisation");
6840     // Don't promote division/remainder by constant since we should expand those
6841     // to multiply by magic constant.
6842     // FIXME: What if the expansion is disabled for minsize.
6843     if (N->getOperand(1).getOpcode() == ISD::Constant)
6844       return;
6845 
6846     // If the input is i32, use ANY_EXTEND since the W instructions don't read
6847     // the upper 32 bits. For other types we need to sign or zero extend
6848     // based on the opcode.
6849     unsigned ExtOpc = ISD::ANY_EXTEND;
6850     if (VT != MVT::i32)
6851       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
6852                                            : ISD::ZERO_EXTEND;
6853 
6854     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
6855     break;
6856   }
6857   case ISD::UADDO:
6858   case ISD::USUBO: {
6859     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6860            "Unexpected custom legalisation");
6861     bool IsAdd = N->getOpcode() == ISD::UADDO;
6862     // Create an ADDW or SUBW.
6863     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6864     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6865     SDValue Res =
6866         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
6867     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
6868                       DAG.getValueType(MVT::i32));
6869 
6870     SDValue Overflow;
6871     if (IsAdd && isOneConstant(RHS)) {
6872       // Special case uaddo X, 1 overflowed if the addition result is 0.
6873       // The general case (X + C) < C is not necessarily beneficial. Although we
6874       // reduce the live range of X, we may introduce the materialization of
6875       // constant C, especially when the setcc result is used by branch. We have
6876       // no compare with constant and branch instructions.
6877       Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
6878                               DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
6879     } else {
6880       // Sign extend the LHS and perform an unsigned compare with the ADDW
6881       // result. Since the inputs are sign extended from i32, this is equivalent
6882       // to comparing the lower 32 bits.
6883       LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
6884       Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
6885                               IsAdd ? ISD::SETULT : ISD::SETUGT);
6886     }
6887 
6888     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6889     Results.push_back(Overflow);
6890     return;
6891   }
6892   case ISD::UADDSAT:
6893   case ISD::USUBSAT: {
6894     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6895            "Unexpected custom legalisation");
6896     if (Subtarget.hasStdExtZbb()) {
6897       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
6898       // sign extend allows overflow of the lower 32 bits to be detected on
6899       // the promoted size.
6900       SDValue LHS =
6901           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
6902       SDValue RHS =
6903           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
6904       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
6905       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
6906       return;
6907     }
6908 
6909     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
6910     // promotion for UADDO/USUBO.
6911     Results.push_back(expandAddSubSat(N, DAG));
6912     return;
6913   }
6914   case ISD::ABS: {
6915     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
6916            "Unexpected custom legalisation");
6917           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
6918 
6919     // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
6920 
6921     SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6922 
6923     // Freeze the source so we can increase it's use count.
6924     Src = DAG.getFreeze(Src);
6925 
6926     // Copy sign bit to all bits using the sraiw pattern.
6927     SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
6928                                    DAG.getValueType(MVT::i32));
6929     SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
6930                            DAG.getConstant(31, DL, MVT::i64));
6931 
6932     SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
6933     NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
6934 
6935     // NOTE: The result is only required to be anyextended, but sext is
6936     // consistent with type legalization of sub.
6937     NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
6938                          DAG.getValueType(MVT::i32));
6939     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
6940     return;
6941   }
6942   case ISD::BITCAST: {
6943     EVT VT = N->getValueType(0);
6944     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
6945     SDValue Op0 = N->getOperand(0);
6946     EVT Op0VT = Op0.getValueType();
6947     MVT XLenVT = Subtarget.getXLenVT();
6948     if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) {
6949       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
6950       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
6951     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
6952                Subtarget.hasStdExtF()) {
6953       SDValue FPConv =
6954           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
6955       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
6956     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
6957                isTypeLegal(Op0VT)) {
6958       // Custom-legalize bitcasts from fixed-length vector types to illegal
6959       // scalar types in order to improve codegen. Bitcast the vector to a
6960       // one-element vector type whose element type is the same as the result
6961       // type, and extract the first element.
6962       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6963       if (isTypeLegal(BVT)) {
6964         SDValue BVec = DAG.getBitcast(BVT, Op0);
6965         Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6966                                       DAG.getConstant(0, DL, XLenVT)));
6967       }
6968     }
6969     break;
6970   }
6971   case RISCVISD::GREV:
6972   case RISCVISD::GORC:
6973   case RISCVISD::SHFL: {
6974     MVT VT = N->getSimpleValueType(0);
6975     MVT XLenVT = Subtarget.getXLenVT();
6976     assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
6977            "Unexpected custom legalisation");
6978     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
6979     assert((Subtarget.hasStdExtZbp() ||
6980             (Subtarget.hasStdExtZbkb() && N->getOpcode() == RISCVISD::GREV &&
6981              N->getConstantOperandVal(1) == 7)) &&
6982            "Unexpected extension");
6983     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
6984     SDValue NewOp1 =
6985         DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
6986     SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp0, NewOp1);
6987     // ReplaceNodeResults requires we maintain the same type for the return
6988     // value.
6989     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
6990     break;
6991   }
6992   case ISD::BSWAP:
6993   case ISD::BITREVERSE: {
6994     MVT VT = N->getSimpleValueType(0);
6995     MVT XLenVT = Subtarget.getXLenVT();
6996     assert((VT == MVT::i8 || VT == MVT::i16 ||
6997             (VT == MVT::i32 && Subtarget.is64Bit())) &&
6998            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
6999     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
7000     unsigned Imm = VT.getSizeInBits() - 1;
7001     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
7002     if (N->getOpcode() == ISD::BSWAP)
7003       Imm &= ~0x7U;
7004     SDValue GREVI = DAG.getNode(RISCVISD::GREV, DL, XLenVT, NewOp0,
7005                                 DAG.getConstant(Imm, DL, XLenVT));
7006     // ReplaceNodeResults requires we maintain the same type for the return
7007     // value.
7008     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI));
7009     break;
7010   }
7011   case ISD::FSHL:
7012   case ISD::FSHR: {
7013     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7014            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
7015     SDValue NewOp0 =
7016         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
7017     SDValue NewOp1 =
7018         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
7019     SDValue NewShAmt =
7020         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
7021     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
7022     // Mask the shift amount to 5 bits to prevent accidentally setting bit 5.
7023     NewShAmt = DAG.getNode(ISD::AND, DL, MVT::i64, NewShAmt,
7024                            DAG.getConstant(0x1f, DL, MVT::i64));
7025     // fshl and fshr concatenate their operands in the same order. fsrw and fslw
7026     // instruction use different orders. fshl will return its first operand for
7027     // shift of zero, fshr will return its second operand. fsl and fsr both
7028     // return rs1 so the ISD nodes need to have different operand orders.
7029     // Shift amount is in rs2.
7030     unsigned Opc = RISCVISD::FSLW;
7031     if (N->getOpcode() == ISD::FSHR) {
7032       std::swap(NewOp0, NewOp1);
7033       Opc = RISCVISD::FSRW;
7034     }
7035     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewShAmt);
7036     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
7037     break;
7038   }
7039   case ISD::EXTRACT_VECTOR_ELT: {
7040     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
7041     // type is illegal (currently only vXi64 RV32).
7042     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
7043     // transferred to the destination register. We issue two of these from the
7044     // upper- and lower- halves of the SEW-bit vector element, slid down to the
7045     // first element.
7046     SDValue Vec = N->getOperand(0);
7047     SDValue Idx = N->getOperand(1);
7048 
7049     // The vector type hasn't been legalized yet so we can't issue target
7050     // specific nodes if it needs legalization.
7051     // FIXME: We would manually legalize if it's important.
7052     if (!isTypeLegal(Vec.getValueType()))
7053       return;
7054 
7055     MVT VecVT = Vec.getSimpleValueType();
7056 
7057     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
7058            VecVT.getVectorElementType() == MVT::i64 &&
7059            "Unexpected EXTRACT_VECTOR_ELT legalization");
7060 
7061     // If this is a fixed vector, we need to convert it to a scalable vector.
7062     MVT ContainerVT = VecVT;
7063     if (VecVT.isFixedLengthVector()) {
7064       ContainerVT = getContainerForFixedLengthVector(VecVT);
7065       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7066     }
7067 
7068     MVT XLenVT = Subtarget.getXLenVT();
7069 
7070     // Use a VL of 1 to avoid processing more elements than we need.
7071     SDValue VL = DAG.getConstant(1, DL, XLenVT);
7072     SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
7073 
7074     // Unless the index is known to be 0, we must slide the vector down to get
7075     // the desired element into index 0.
7076     if (!isNullConstant(Idx)) {
7077       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
7078                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
7079     }
7080 
7081     // Extract the lower XLEN bits of the correct vector element.
7082     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
7083 
7084     // To extract the upper XLEN bits of the vector element, shift the first
7085     // element right by 32 bits and re-extract the lower XLEN bits.
7086     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7087                                      DAG.getUNDEF(ContainerVT),
7088                                      DAG.getConstant(32, DL, XLenVT), VL);
7089     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
7090                                  ThirtyTwoV, Mask, VL);
7091 
7092     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
7093 
7094     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
7095     break;
7096   }
7097   case ISD::INTRINSIC_WO_CHAIN: {
7098     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
7099     switch (IntNo) {
7100     default:
7101       llvm_unreachable(
7102           "Don't know how to custom type legalize this intrinsic!");
7103     case Intrinsic::riscv_grev:
7104     case Intrinsic::riscv_gorc: {
7105       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7106              "Unexpected custom legalisation");
7107       SDValue NewOp1 =
7108           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
7109       SDValue NewOp2 =
7110           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
7111       unsigned Opc =
7112           IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW;
7113       // If the control is a constant, promote the node by clearing any extra
7114       // bits bits in the control. isel will form greviw/gorciw if the result is
7115       // sign extended.
7116       if (isa<ConstantSDNode>(NewOp2)) {
7117         NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
7118                              DAG.getConstant(0x1f, DL, MVT::i64));
7119         Opc = IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
7120       }
7121       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
7122       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7123       break;
7124     }
7125     case Intrinsic::riscv_bcompress:
7126     case Intrinsic::riscv_bdecompress:
7127     case Intrinsic::riscv_bfp:
7128     case Intrinsic::riscv_fsl:
7129     case Intrinsic::riscv_fsr: {
7130       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7131              "Unexpected custom legalisation");
7132       Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
7133       break;
7134     }
7135     case Intrinsic::riscv_orc_b: {
7136       // Lower to the GORCI encoding for orc.b with the operand extended.
7137       SDValue NewOp =
7138           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
7139       SDValue Res = DAG.getNode(RISCVISD::GORC, DL, MVT::i64, NewOp,
7140                                 DAG.getConstant(7, DL, MVT::i64));
7141       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7142       return;
7143     }
7144     case Intrinsic::riscv_shfl:
7145     case Intrinsic::riscv_unshfl: {
7146       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7147              "Unexpected custom legalisation");
7148       SDValue NewOp1 =
7149           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
7150       SDValue NewOp2 =
7151           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
7152       unsigned Opc =
7153           IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
7154       // There is no (UN)SHFLIW. If the control word is a constant, we can use
7155       // (UN)SHFLI with bit 4 of the control word cleared. The upper 32 bit half
7156       // will be shuffled the same way as the lower 32 bit half, but the two
7157       // halves won't cross.
7158       if (isa<ConstantSDNode>(NewOp2)) {
7159         NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
7160                              DAG.getConstant(0xf, DL, MVT::i64));
7161         Opc =
7162             IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
7163       }
7164       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
7165       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7166       break;
7167     }
7168     case Intrinsic::riscv_vmv_x_s: {
7169       EVT VT = N->getValueType(0);
7170       MVT XLenVT = Subtarget.getXLenVT();
7171       if (VT.bitsLT(XLenVT)) {
7172         // Simple case just extract using vmv.x.s and truncate.
7173         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
7174                                       Subtarget.getXLenVT(), N->getOperand(1));
7175         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
7176         return;
7177       }
7178 
7179       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
7180              "Unexpected custom legalization");
7181 
7182       // We need to do the move in two steps.
7183       SDValue Vec = N->getOperand(1);
7184       MVT VecVT = Vec.getSimpleValueType();
7185 
7186       // First extract the lower XLEN bits of the element.
7187       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
7188 
7189       // To extract the upper XLEN bits of the vector element, shift the first
7190       // element right by 32 bits and re-extract the lower XLEN bits.
7191       SDValue VL = DAG.getConstant(1, DL, XLenVT);
7192       SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
7193 
7194       SDValue ThirtyTwoV =
7195           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
7196                       DAG.getConstant(32, DL, XLenVT), VL);
7197       SDValue LShr32 =
7198           DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
7199       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
7200 
7201       Results.push_back(
7202           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
7203       break;
7204     }
7205     }
7206     break;
7207   }
7208   case ISD::VECREDUCE_ADD:
7209   case ISD::VECREDUCE_AND:
7210   case ISD::VECREDUCE_OR:
7211   case ISD::VECREDUCE_XOR:
7212   case ISD::VECREDUCE_SMAX:
7213   case ISD::VECREDUCE_UMAX:
7214   case ISD::VECREDUCE_SMIN:
7215   case ISD::VECREDUCE_UMIN:
7216     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
7217       Results.push_back(V);
7218     break;
7219   case ISD::VP_REDUCE_ADD:
7220   case ISD::VP_REDUCE_AND:
7221   case ISD::VP_REDUCE_OR:
7222   case ISD::VP_REDUCE_XOR:
7223   case ISD::VP_REDUCE_SMAX:
7224   case ISD::VP_REDUCE_UMAX:
7225   case ISD::VP_REDUCE_SMIN:
7226   case ISD::VP_REDUCE_UMIN:
7227     if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
7228       Results.push_back(V);
7229     break;
7230   case ISD::FLT_ROUNDS_: {
7231     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
7232     SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
7233     Results.push_back(Res.getValue(0));
7234     Results.push_back(Res.getValue(1));
7235     break;
7236   }
7237   }
7238 }
7239 
7240 // A structure to hold one of the bit-manipulation patterns below. Together, a
7241 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
7242 //   (or (and (shl x, 1), 0xAAAAAAAA),
7243 //       (and (srl x, 1), 0x55555555))
7244 struct RISCVBitmanipPat {
7245   SDValue Op;
7246   unsigned ShAmt;
7247   bool IsSHL;
7248 
7249   bool formsPairWith(const RISCVBitmanipPat &Other) const {
7250     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
7251   }
7252 };
7253 
7254 // Matches patterns of the form
7255 //   (and (shl x, C2), (C1 << C2))
7256 //   (and (srl x, C2), C1)
7257 //   (shl (and x, C1), C2)
7258 //   (srl (and x, (C1 << C2)), C2)
7259 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
7260 // The expected masks for each shift amount are specified in BitmanipMasks where
7261 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
7262 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
7263 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
7264 // XLen is 64.
7265 static Optional<RISCVBitmanipPat>
7266 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
7267   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
7268          "Unexpected number of masks");
7269   Optional<uint64_t> Mask;
7270   // Optionally consume a mask around the shift operation.
7271   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
7272     Mask = Op.getConstantOperandVal(1);
7273     Op = Op.getOperand(0);
7274   }
7275   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
7276     return None;
7277   bool IsSHL = Op.getOpcode() == ISD::SHL;
7278 
7279   if (!isa<ConstantSDNode>(Op.getOperand(1)))
7280     return None;
7281   uint64_t ShAmt = Op.getConstantOperandVal(1);
7282 
7283   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
7284   if (ShAmt >= Width || !isPowerOf2_64(ShAmt))
7285     return None;
7286   // If we don't have enough masks for 64 bit, then we must be trying to
7287   // match SHFL so we're only allowed to shift 1/4 of the width.
7288   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
7289     return None;
7290 
7291   SDValue Src = Op.getOperand(0);
7292 
7293   // The expected mask is shifted left when the AND is found around SHL
7294   // patterns.
7295   //   ((x >> 1) & 0x55555555)
7296   //   ((x << 1) & 0xAAAAAAAA)
7297   bool SHLExpMask = IsSHL;
7298 
7299   if (!Mask) {
7300     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
7301     // the mask is all ones: consume that now.
7302     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
7303       Mask = Src.getConstantOperandVal(1);
7304       Src = Src.getOperand(0);
7305       // The expected mask is now in fact shifted left for SRL, so reverse the
7306       // decision.
7307       //   ((x & 0xAAAAAAAA) >> 1)
7308       //   ((x & 0x55555555) << 1)
7309       SHLExpMask = !SHLExpMask;
7310     } else {
7311       // Use a default shifted mask of all-ones if there's no AND, truncated
7312       // down to the expected width. This simplifies the logic later on.
7313       Mask = maskTrailingOnes<uint64_t>(Width);
7314       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
7315     }
7316   }
7317 
7318   unsigned MaskIdx = Log2_32(ShAmt);
7319   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
7320 
7321   if (SHLExpMask)
7322     ExpMask <<= ShAmt;
7323 
7324   if (Mask != ExpMask)
7325     return None;
7326 
7327   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
7328 }
7329 
7330 // Matches any of the following bit-manipulation patterns:
7331 //   (and (shl x, 1), (0x55555555 << 1))
7332 //   (and (srl x, 1), 0x55555555)
7333 //   (shl (and x, 0x55555555), 1)
7334 //   (srl (and x, (0x55555555 << 1)), 1)
7335 // where the shift amount and mask may vary thus:
7336 //   [1]  = 0x55555555 / 0xAAAAAAAA
7337 //   [2]  = 0x33333333 / 0xCCCCCCCC
7338 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
7339 //   [8]  = 0x00FF00FF / 0xFF00FF00
7340 //   [16] = 0x0000FFFF / 0xFFFFFFFF
7341 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
7342 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
7343   // These are the unshifted masks which we use to match bit-manipulation
7344   // patterns. They may be shifted left in certain circumstances.
7345   static const uint64_t BitmanipMasks[] = {
7346       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
7347       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
7348 
7349   return matchRISCVBitmanipPat(Op, BitmanipMasks);
7350 }
7351 
7352 // Try to fold (<bop> x, (reduction.<bop> vec, start))
7353 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG) {
7354   auto BinOpToRVVReduce = [](unsigned Opc) {
7355     switch (Opc) {
7356     default:
7357       llvm_unreachable("Unhandled binary to transfrom reduction");
7358     case ISD::ADD:
7359       return RISCVISD::VECREDUCE_ADD_VL;
7360     case ISD::UMAX:
7361       return RISCVISD::VECREDUCE_UMAX_VL;
7362     case ISD::SMAX:
7363       return RISCVISD::VECREDUCE_SMAX_VL;
7364     case ISD::UMIN:
7365       return RISCVISD::VECREDUCE_UMIN_VL;
7366     case ISD::SMIN:
7367       return RISCVISD::VECREDUCE_SMIN_VL;
7368     case ISD::AND:
7369       return RISCVISD::VECREDUCE_AND_VL;
7370     case ISD::OR:
7371       return RISCVISD::VECREDUCE_OR_VL;
7372     case ISD::XOR:
7373       return RISCVISD::VECREDUCE_XOR_VL;
7374     case ISD::FADD:
7375       return RISCVISD::VECREDUCE_FADD_VL;
7376     case ISD::FMAXNUM:
7377       return RISCVISD::VECREDUCE_FMAX_VL;
7378     case ISD::FMINNUM:
7379       return RISCVISD::VECREDUCE_FMIN_VL;
7380     }
7381   };
7382 
7383   auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
7384     return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7385            isNullConstant(V.getOperand(1)) &&
7386            V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
7387   };
7388 
7389   unsigned Opc = N->getOpcode();
7390   unsigned ReduceIdx;
7391   if (IsReduction(N->getOperand(0), Opc))
7392     ReduceIdx = 0;
7393   else if (IsReduction(N->getOperand(1), Opc))
7394     ReduceIdx = 1;
7395   else
7396     return SDValue();
7397 
7398   // Skip if FADD disallows reassociation but the combiner needs.
7399   if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
7400     return SDValue();
7401 
7402   SDValue Extract = N->getOperand(ReduceIdx);
7403   SDValue Reduce = Extract.getOperand(0);
7404   if (!Reduce.hasOneUse())
7405     return SDValue();
7406 
7407   SDValue ScalarV = Reduce.getOperand(2);
7408 
7409   // Make sure that ScalarV is a splat with VL=1.
7410   if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
7411       ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
7412       ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
7413     return SDValue();
7414 
7415   if (!isOneConstant(ScalarV.getOperand(2)))
7416     return SDValue();
7417 
7418   // TODO: Deal with value other than neutral element.
7419   auto IsRVVNeutralElement = [Opc, &DAG](SDNode *N, SDValue V) {
7420     if (Opc == ISD::FADD && N->getFlags().hasNoSignedZeros() &&
7421         isNullFPConstant(V))
7422       return true;
7423     return DAG.getNeutralElement(Opc, SDLoc(V), V.getSimpleValueType(),
7424                                  N->getFlags()) == V;
7425   };
7426 
7427   // Check the scalar of ScalarV is neutral element
7428   if (!IsRVVNeutralElement(N, ScalarV.getOperand(1)))
7429     return SDValue();
7430 
7431   if (!ScalarV.hasOneUse())
7432     return SDValue();
7433 
7434   EVT SplatVT = ScalarV.getValueType();
7435   SDValue NewStart = N->getOperand(1 - ReduceIdx);
7436   unsigned SplatOpc = RISCVISD::VFMV_S_F_VL;
7437   if (SplatVT.isInteger()) {
7438     auto *C = dyn_cast<ConstantSDNode>(NewStart.getNode());
7439     if (!C || C->isZero() || !isInt<5>(C->getSExtValue()))
7440       SplatOpc = RISCVISD::VMV_S_X_VL;
7441     else
7442       SplatOpc = RISCVISD::VMV_V_X_VL;
7443   }
7444 
7445   SDValue NewScalarV =
7446       DAG.getNode(SplatOpc, SDLoc(N), SplatVT, ScalarV.getOperand(0), NewStart,
7447                   ScalarV.getOperand(2));
7448   SDValue NewReduce =
7449       DAG.getNode(Reduce.getOpcode(), SDLoc(Reduce), Reduce.getValueType(),
7450                   Reduce.getOperand(0), Reduce.getOperand(1), NewScalarV,
7451                   Reduce.getOperand(3), Reduce.getOperand(4));
7452   return DAG.getNode(Extract.getOpcode(), SDLoc(Extract),
7453                      Extract.getValueType(), NewReduce, Extract.getOperand(1));
7454 }
7455 
7456 // Match the following pattern as a GREVI(W) operation
7457 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
7458 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
7459                                const RISCVSubtarget &Subtarget) {
7460   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
7461   EVT VT = Op.getValueType();
7462 
7463   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
7464     auto LHS = matchGREVIPat(Op.getOperand(0));
7465     auto RHS = matchGREVIPat(Op.getOperand(1));
7466     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
7467       SDLoc DL(Op);
7468       return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op,
7469                          DAG.getConstant(LHS->ShAmt, DL, VT));
7470     }
7471   }
7472   return SDValue();
7473 }
7474 
7475 // Matches any the following pattern as a GORCI(W) operation
7476 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
7477 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
7478 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
7479 // Note that with the variant of 3.,
7480 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
7481 // the inner pattern will first be matched as GREVI and then the outer
7482 // pattern will be matched to GORC via the first rule above.
7483 // 4.  (or (rotl/rotr x, bitwidth/2), x)
7484 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
7485                                const RISCVSubtarget &Subtarget) {
7486   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
7487   EVT VT = Op.getValueType();
7488 
7489   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
7490     SDLoc DL(Op);
7491     SDValue Op0 = Op.getOperand(0);
7492     SDValue Op1 = Op.getOperand(1);
7493 
7494     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
7495       if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X &&
7496           isa<ConstantSDNode>(Reverse.getOperand(1)) &&
7497           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
7498         return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1));
7499       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
7500       if ((Reverse.getOpcode() == ISD::ROTL ||
7501            Reverse.getOpcode() == ISD::ROTR) &&
7502           Reverse.getOperand(0) == X &&
7503           isa<ConstantSDNode>(Reverse.getOperand(1))) {
7504         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
7505         if (RotAmt == (VT.getSizeInBits() / 2))
7506           return DAG.getNode(RISCVISD::GORC, DL, VT, X,
7507                              DAG.getConstant(RotAmt, DL, VT));
7508       }
7509       return SDValue();
7510     };
7511 
7512     // Check for either commutable permutation of (or (GREVI x, shamt), x)
7513     if (SDValue V = MatchOROfReverse(Op0, Op1))
7514       return V;
7515     if (SDValue V = MatchOROfReverse(Op1, Op0))
7516       return V;
7517 
7518     // OR is commutable so canonicalize its OR operand to the left
7519     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
7520       std::swap(Op0, Op1);
7521     if (Op0.getOpcode() != ISD::OR)
7522       return SDValue();
7523     SDValue OrOp0 = Op0.getOperand(0);
7524     SDValue OrOp1 = Op0.getOperand(1);
7525     auto LHS = matchGREVIPat(OrOp0);
7526     // OR is commutable so swap the operands and try again: x might have been
7527     // on the left
7528     if (!LHS) {
7529       std::swap(OrOp0, OrOp1);
7530       LHS = matchGREVIPat(OrOp0);
7531     }
7532     auto RHS = matchGREVIPat(Op1);
7533     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
7534       return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op,
7535                          DAG.getConstant(LHS->ShAmt, DL, VT));
7536     }
7537   }
7538   return SDValue();
7539 }
7540 
7541 // Matches any of the following bit-manipulation patterns:
7542 //   (and (shl x, 1), (0x22222222 << 1))
7543 //   (and (srl x, 1), 0x22222222)
7544 //   (shl (and x, 0x22222222), 1)
7545 //   (srl (and x, (0x22222222 << 1)), 1)
7546 // where the shift amount and mask may vary thus:
7547 //   [1]  = 0x22222222 / 0x44444444
7548 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
7549 //   [4]  = 0x00F000F0 / 0x0F000F00
7550 //   [8]  = 0x0000FF00 / 0x00FF0000
7551 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
7552 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
7553   // These are the unshifted masks which we use to match bit-manipulation
7554   // patterns. They may be shifted left in certain circumstances.
7555   static const uint64_t BitmanipMasks[] = {
7556       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
7557       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
7558 
7559   return matchRISCVBitmanipPat(Op, BitmanipMasks);
7560 }
7561 
7562 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
7563 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
7564                                const RISCVSubtarget &Subtarget) {
7565   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
7566   EVT VT = Op.getValueType();
7567 
7568   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
7569     return SDValue();
7570 
7571   SDValue Op0 = Op.getOperand(0);
7572   SDValue Op1 = Op.getOperand(1);
7573 
7574   // Or is commutable so canonicalize the second OR to the LHS.
7575   if (Op0.getOpcode() != ISD::OR)
7576     std::swap(Op0, Op1);
7577   if (Op0.getOpcode() != ISD::OR)
7578     return SDValue();
7579 
7580   // We found an inner OR, so our operands are the operands of the inner OR
7581   // and the other operand of the outer OR.
7582   SDValue A = Op0.getOperand(0);
7583   SDValue B = Op0.getOperand(1);
7584   SDValue C = Op1;
7585 
7586   auto Match1 = matchSHFLPat(A);
7587   auto Match2 = matchSHFLPat(B);
7588 
7589   // If neither matched, we failed.
7590   if (!Match1 && !Match2)
7591     return SDValue();
7592 
7593   // We had at least one match. if one failed, try the remaining C operand.
7594   if (!Match1) {
7595     std::swap(A, C);
7596     Match1 = matchSHFLPat(A);
7597     if (!Match1)
7598       return SDValue();
7599   } else if (!Match2) {
7600     std::swap(B, C);
7601     Match2 = matchSHFLPat(B);
7602     if (!Match2)
7603       return SDValue();
7604   }
7605   assert(Match1 && Match2);
7606 
7607   // Make sure our matches pair up.
7608   if (!Match1->formsPairWith(*Match2))
7609     return SDValue();
7610 
7611   // All the remains is to make sure C is an AND with the same input, that masks
7612   // out the bits that are being shuffled.
7613   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
7614       C.getOperand(0) != Match1->Op)
7615     return SDValue();
7616 
7617   uint64_t Mask = C.getConstantOperandVal(1);
7618 
7619   static const uint64_t BitmanipMasks[] = {
7620       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
7621       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
7622   };
7623 
7624   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
7625   unsigned MaskIdx = Log2_32(Match1->ShAmt);
7626   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
7627 
7628   if (Mask != ExpMask)
7629     return SDValue();
7630 
7631   SDLoc DL(Op);
7632   return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op,
7633                      DAG.getConstant(Match1->ShAmt, DL, VT));
7634 }
7635 
7636 // Optimize (add (shl x, c0), (shl y, c1)) ->
7637 //          (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
7638 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
7639                                   const RISCVSubtarget &Subtarget) {
7640   // Perform this optimization only in the zba extension.
7641   if (!Subtarget.hasStdExtZba())
7642     return SDValue();
7643 
7644   // Skip for vector types and larger types.
7645   EVT VT = N->getValueType(0);
7646   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
7647     return SDValue();
7648 
7649   // The two operand nodes must be SHL and have no other use.
7650   SDValue N0 = N->getOperand(0);
7651   SDValue N1 = N->getOperand(1);
7652   if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
7653       !N0->hasOneUse() || !N1->hasOneUse())
7654     return SDValue();
7655 
7656   // Check c0 and c1.
7657   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
7658   auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
7659   if (!N0C || !N1C)
7660     return SDValue();
7661   int64_t C0 = N0C->getSExtValue();
7662   int64_t C1 = N1C->getSExtValue();
7663   if (C0 <= 0 || C1 <= 0)
7664     return SDValue();
7665 
7666   // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
7667   int64_t Bits = std::min(C0, C1);
7668   int64_t Diff = std::abs(C0 - C1);
7669   if (Diff != 1 && Diff != 2 && Diff != 3)
7670     return SDValue();
7671 
7672   // Build nodes.
7673   SDLoc DL(N);
7674   SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
7675   SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
7676   SDValue NA0 =
7677       DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
7678   SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
7679   return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
7680 }
7681 
7682 // Combine
7683 // ROTR ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32
7684 // ROTL ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32
7685 // ROTR ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64
7686 // ROTL ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64
7687 // RORW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64
7688 // ROLW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64
7689 // The grev patterns represents BSWAP.
7690 // FIXME: This can be generalized to any GREV. We just need to toggle the MSB
7691 // off the grev.
7692 static SDValue combineROTR_ROTL_RORW_ROLW(SDNode *N, SelectionDAG &DAG,
7693                                           const RISCVSubtarget &Subtarget) {
7694   bool IsWInstruction =
7695       N->getOpcode() == RISCVISD::RORW || N->getOpcode() == RISCVISD::ROLW;
7696   assert((N->getOpcode() == ISD::ROTR || N->getOpcode() == ISD::ROTL ||
7697           IsWInstruction) &&
7698          "Unexpected opcode!");
7699   SDValue Src = N->getOperand(0);
7700   EVT VT = N->getValueType(0);
7701   SDLoc DL(N);
7702 
7703   if (!Subtarget.hasStdExtZbp() || Src.getOpcode() != RISCVISD::GREV)
7704     return SDValue();
7705 
7706   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
7707       !isa<ConstantSDNode>(Src.getOperand(1)))
7708     return SDValue();
7709 
7710   unsigned BitWidth = IsWInstruction ? 32 : VT.getSizeInBits();
7711   assert(isPowerOf2_32(BitWidth) && "Expected a power of 2");
7712 
7713   // Needs to be a rotate by half the bitwidth for ROTR/ROTL or by 16 for
7714   // RORW/ROLW. And the grev should be the encoding for bswap for this width.
7715   unsigned ShAmt1 = N->getConstantOperandVal(1);
7716   unsigned ShAmt2 = Src.getConstantOperandVal(1);
7717   if (BitWidth < 32 || ShAmt1 != (BitWidth / 2) || ShAmt2 != (BitWidth - 8))
7718     return SDValue();
7719 
7720   Src = Src.getOperand(0);
7721 
7722   // Toggle bit the MSB of the shift.
7723   unsigned CombinedShAmt = ShAmt1 ^ ShAmt2;
7724   if (CombinedShAmt == 0)
7725     return Src;
7726 
7727   SDValue Res = DAG.getNode(
7728       RISCVISD::GREV, DL, VT, Src,
7729       DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
7730   if (!IsWInstruction)
7731     return Res;
7732 
7733   // Sign extend the result to match the behavior of the rotate. This will be
7734   // selected to GREVIW in isel.
7735   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Res,
7736                      DAG.getValueType(MVT::i32));
7737 }
7738 
7739 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
7740 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
7741 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
7742 // not undo itself, but they are redundant.
7743 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
7744   bool IsGORC = N->getOpcode() == RISCVISD::GORC;
7745   assert((IsGORC || N->getOpcode() == RISCVISD::GREV) && "Unexpected opcode");
7746   SDValue Src = N->getOperand(0);
7747 
7748   if (Src.getOpcode() != N->getOpcode())
7749     return SDValue();
7750 
7751   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
7752       !isa<ConstantSDNode>(Src.getOperand(1)))
7753     return SDValue();
7754 
7755   unsigned ShAmt1 = N->getConstantOperandVal(1);
7756   unsigned ShAmt2 = Src.getConstantOperandVal(1);
7757   Src = Src.getOperand(0);
7758 
7759   unsigned CombinedShAmt;
7760   if (IsGORC)
7761     CombinedShAmt = ShAmt1 | ShAmt2;
7762   else
7763     CombinedShAmt = ShAmt1 ^ ShAmt2;
7764 
7765   if (CombinedShAmt == 0)
7766     return Src;
7767 
7768   SDLoc DL(N);
7769   return DAG.getNode(
7770       N->getOpcode(), DL, N->getValueType(0), Src,
7771       DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
7772 }
7773 
7774 // Combine a constant select operand into its use:
7775 //
7776 // (and (select cond, -1, c), x)
7777 //   -> (select cond, x, (and x, c))  [AllOnes=1]
7778 // (or  (select cond, 0, c), x)
7779 //   -> (select cond, x, (or x, c))  [AllOnes=0]
7780 // (xor (select cond, 0, c), x)
7781 //   -> (select cond, x, (xor x, c))  [AllOnes=0]
7782 // (add (select cond, 0, c), x)
7783 //   -> (select cond, x, (add x, c))  [AllOnes=0]
7784 // (sub x, (select cond, 0, c))
7785 //   -> (select cond, x, (sub x, c))  [AllOnes=0]
7786 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
7787                                    SelectionDAG &DAG, bool AllOnes) {
7788   EVT VT = N->getValueType(0);
7789 
7790   // Skip vectors.
7791   if (VT.isVector())
7792     return SDValue();
7793 
7794   if ((Slct.getOpcode() != ISD::SELECT &&
7795        Slct.getOpcode() != RISCVISD::SELECT_CC) ||
7796       !Slct.hasOneUse())
7797     return SDValue();
7798 
7799   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
7800     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
7801   };
7802 
7803   bool SwapSelectOps;
7804   unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
7805   SDValue TrueVal = Slct.getOperand(1 + OpOffset);
7806   SDValue FalseVal = Slct.getOperand(2 + OpOffset);
7807   SDValue NonConstantVal;
7808   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
7809     SwapSelectOps = false;
7810     NonConstantVal = FalseVal;
7811   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
7812     SwapSelectOps = true;
7813     NonConstantVal = TrueVal;
7814   } else
7815     return SDValue();
7816 
7817   // Slct is now know to be the desired identity constant when CC is true.
7818   TrueVal = OtherOp;
7819   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
7820   // Unless SwapSelectOps says the condition should be false.
7821   if (SwapSelectOps)
7822     std::swap(TrueVal, FalseVal);
7823 
7824   if (Slct.getOpcode() == RISCVISD::SELECT_CC)
7825     return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
7826                        {Slct.getOperand(0), Slct.getOperand(1),
7827                         Slct.getOperand(2), TrueVal, FalseVal});
7828 
7829   return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
7830                      {Slct.getOperand(0), TrueVal, FalseVal});
7831 }
7832 
7833 // Attempt combineSelectAndUse on each operand of a commutative operator N.
7834 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
7835                                               bool AllOnes) {
7836   SDValue N0 = N->getOperand(0);
7837   SDValue N1 = N->getOperand(1);
7838   if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes))
7839     return Result;
7840   if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes))
7841     return Result;
7842   return SDValue();
7843 }
7844 
7845 // Transform (add (mul x, c0), c1) ->
7846 //           (add (mul (add x, c1/c0), c0), c1%c0).
7847 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
7848 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
7849 // to an infinite loop in DAGCombine if transformed.
7850 // Or transform (add (mul x, c0), c1) ->
7851 //              (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
7852 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
7853 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
7854 // lead to an infinite loop in DAGCombine if transformed.
7855 // Or transform (add (mul x, c0), c1) ->
7856 //              (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
7857 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
7858 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
7859 // lead to an infinite loop in DAGCombine if transformed.
7860 // Or transform (add (mul x, c0), c1) ->
7861 //              (mul (add x, c1/c0), c0).
7862 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
7863 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
7864                                      const RISCVSubtarget &Subtarget) {
7865   // Skip for vector types and larger types.
7866   EVT VT = N->getValueType(0);
7867   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
7868     return SDValue();
7869   // The first operand node must be a MUL and has no other use.
7870   SDValue N0 = N->getOperand(0);
7871   if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
7872     return SDValue();
7873   // Check if c0 and c1 match above conditions.
7874   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
7875   auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
7876   if (!N0C || !N1C)
7877     return SDValue();
7878   // If N0C has multiple uses it's possible one of the cases in
7879   // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
7880   // in an infinite loop.
7881   if (!N0C->hasOneUse())
7882     return SDValue();
7883   int64_t C0 = N0C->getSExtValue();
7884   int64_t C1 = N1C->getSExtValue();
7885   int64_t CA, CB;
7886   if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
7887     return SDValue();
7888   // Search for proper CA (non-zero) and CB that both are simm12.
7889   if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
7890       !isInt<12>(C0 * (C1 / C0))) {
7891     CA = C1 / C0;
7892     CB = C1 % C0;
7893   } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
7894              isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
7895     CA = C1 / C0 + 1;
7896     CB = C1 % C0 - C0;
7897   } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
7898              isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
7899     CA = C1 / C0 - 1;
7900     CB = C1 % C0 + C0;
7901   } else
7902     return SDValue();
7903   // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
7904   SDLoc DL(N);
7905   SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
7906                              DAG.getConstant(CA, DL, VT));
7907   SDValue New1 =
7908       DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
7909   return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
7910 }
7911 
7912 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
7913                                  const RISCVSubtarget &Subtarget) {
7914   if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
7915     return V;
7916   if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
7917     return V;
7918   if (SDValue V = combineBinOpToReduce(N, DAG))
7919     return V;
7920   // fold (add (select lhs, rhs, cc, 0, y), x) ->
7921   //      (select lhs, rhs, cc, x, (add x, y))
7922   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7923 }
7924 
7925 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
7926   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
7927   //      (select lhs, rhs, cc, x, (sub x, y))
7928   SDValue N0 = N->getOperand(0);
7929   SDValue N1 = N->getOperand(1);
7930   return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
7931 }
7932 
7933 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
7934                                  const RISCVSubtarget &Subtarget) {
7935   SDValue N0 = N->getOperand(0);
7936   // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
7937   // extending X. This is safe since we only need the LSB after the shift and
7938   // shift amounts larger than 31 would produce poison. If we wait until
7939   // type legalization, we'll create RISCVISD::SRLW and we can't recover it
7940   // to use a BEXT instruction.
7941   if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
7942       N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
7943       N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
7944       N0.hasOneUse()) {
7945     SDLoc DL(N);
7946     SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
7947     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
7948     SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
7949     SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
7950                               DAG.getConstant(1, DL, MVT::i64));
7951     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
7952   }
7953 
7954   if (SDValue V = combineBinOpToReduce(N, DAG))
7955     return V;
7956 
7957   // fold (and (select lhs, rhs, cc, -1, y), x) ->
7958   //      (select lhs, rhs, cc, x, (and x, y))
7959   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
7960 }
7961 
7962 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
7963                                 const RISCVSubtarget &Subtarget) {
7964   if (Subtarget.hasStdExtZbp()) {
7965     if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
7966       return GREV;
7967     if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget))
7968       return GORC;
7969     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget))
7970       return SHFL;
7971   }
7972 
7973   if (SDValue V = combineBinOpToReduce(N, DAG))
7974     return V;
7975   // fold (or (select cond, 0, y), x) ->
7976   //      (select cond, x, (or x, y))
7977   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
7978 }
7979 
7980 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
7981   SDValue N0 = N->getOperand(0);
7982   SDValue N1 = N->getOperand(1);
7983 
7984   // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
7985   // NOTE: Assumes ROL being legal means ROLW is legal.
7986   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7987   if (N0.getOpcode() == RISCVISD::SLLW &&
7988       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
7989       TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
7990     SDLoc DL(N);
7991     return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
7992                        DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
7993   }
7994 
7995   if (SDValue V = combineBinOpToReduce(N, DAG))
7996     return V;
7997   // fold (xor (select cond, 0, y), x) ->
7998   //      (select cond, x, (xor x, y))
7999   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
8000 }
8001 
8002 static SDValue
8003 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
8004                                 const RISCVSubtarget &Subtarget) {
8005   SDValue Src = N->getOperand(0);
8006   EVT VT = N->getValueType(0);
8007 
8008   // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
8009   if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
8010       cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
8011     return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
8012                        Src.getOperand(0));
8013 
8014   // Fold (i64 (sext_inreg (abs X), i32)) ->
8015   // (i64 (smax (sext_inreg (neg X), i32), X)) if X has more than 32 sign bits.
8016   // The (sext_inreg (neg X), i32) will be selected to negw by isel. This
8017   // pattern occurs after type legalization of (i32 (abs X)) on RV64 if the user
8018   // of the (i32 (abs X)) is a sext or setcc or something else that causes type
8019   // legalization to add a sext_inreg after the abs. The (i32 (abs X)) will have
8020   // been type legalized to (i64 (abs (sext_inreg X, i32))), but the sext_inreg
8021   // may get combined into an earlier operation so we need to use
8022   // ComputeNumSignBits.
8023   // NOTE: (i64 (sext_inreg (abs X), i32)) can also be created for
8024   // (i64 (ashr (shl (abs X), 32), 32)) without any type legalization so
8025   // we can't assume that X has 33 sign bits. We must check.
8026   if (Subtarget.hasStdExtZbb() && Subtarget.is64Bit() &&
8027       Src.getOpcode() == ISD::ABS && Src.hasOneUse() && VT == MVT::i64 &&
8028       cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32 &&
8029       DAG.ComputeNumSignBits(Src.getOperand(0)) > 32) {
8030     SDLoc DL(N);
8031     SDValue Freeze = DAG.getFreeze(Src.getOperand(0));
8032     SDValue Neg =
8033         DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, MVT::i64), Freeze);
8034     Neg = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Neg,
8035                       DAG.getValueType(MVT::i32));
8036     return DAG.getNode(ISD::SMAX, DL, MVT::i64, Freeze, Neg);
8037   }
8038 
8039   return SDValue();
8040 }
8041 
8042 // Try to form vwadd(u).wv/wx or vwsub(u).wv/wx. It might later be optimized to
8043 // vwadd(u).vv/vx or vwsub(u).vv/vx.
8044 static SDValue combineADDSUB_VLToVWADDSUB_VL(SDNode *N, SelectionDAG &DAG,
8045                                              bool Commute = false) {
8046   assert((N->getOpcode() == RISCVISD::ADD_VL ||
8047           N->getOpcode() == RISCVISD::SUB_VL) &&
8048          "Unexpected opcode");
8049   bool IsAdd = N->getOpcode() == RISCVISD::ADD_VL;
8050   SDValue Op0 = N->getOperand(0);
8051   SDValue Op1 = N->getOperand(1);
8052   if (Commute)
8053     std::swap(Op0, Op1);
8054 
8055   MVT VT = N->getSimpleValueType(0);
8056 
8057   // Determine the narrow size for a widening add/sub.
8058   unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
8059   MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
8060                                   VT.getVectorElementCount());
8061 
8062   SDValue Mask = N->getOperand(2);
8063   SDValue VL = N->getOperand(3);
8064 
8065   SDLoc DL(N);
8066 
8067   // If the RHS is a sext or zext, we can form a widening op.
8068   if ((Op1.getOpcode() == RISCVISD::VZEXT_VL ||
8069        Op1.getOpcode() == RISCVISD::VSEXT_VL) &&
8070       Op1.hasOneUse() && Op1.getOperand(1) == Mask && Op1.getOperand(2) == VL) {
8071     unsigned ExtOpc = Op1.getOpcode();
8072     Op1 = Op1.getOperand(0);
8073     // Re-introduce narrower extends if needed.
8074     if (Op1.getValueType() != NarrowVT)
8075       Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
8076 
8077     unsigned WOpc;
8078     if (ExtOpc == RISCVISD::VSEXT_VL)
8079       WOpc = IsAdd ? RISCVISD::VWADD_W_VL : RISCVISD::VWSUB_W_VL;
8080     else
8081       WOpc = IsAdd ? RISCVISD::VWADDU_W_VL : RISCVISD::VWSUBU_W_VL;
8082 
8083     return DAG.getNode(WOpc, DL, VT, Op0, Op1, Mask, VL);
8084   }
8085 
8086   // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
8087   // sext/zext?
8088 
8089   return SDValue();
8090 }
8091 
8092 // Try to convert vwadd(u).wv/wx or vwsub(u).wv/wx to vwadd(u).vv/vx or
8093 // vwsub(u).vv/vx.
8094 static SDValue combineVWADD_W_VL_VWSUB_W_VL(SDNode *N, SelectionDAG &DAG) {
8095   SDValue Op0 = N->getOperand(0);
8096   SDValue Op1 = N->getOperand(1);
8097   SDValue Mask = N->getOperand(2);
8098   SDValue VL = N->getOperand(3);
8099 
8100   MVT VT = N->getSimpleValueType(0);
8101   MVT NarrowVT = Op1.getSimpleValueType();
8102   unsigned NarrowSize = NarrowVT.getScalarSizeInBits();
8103 
8104   unsigned VOpc;
8105   switch (N->getOpcode()) {
8106   default: llvm_unreachable("Unexpected opcode");
8107   case RISCVISD::VWADD_W_VL:  VOpc = RISCVISD::VWADD_VL;  break;
8108   case RISCVISD::VWSUB_W_VL:  VOpc = RISCVISD::VWSUB_VL;  break;
8109   case RISCVISD::VWADDU_W_VL: VOpc = RISCVISD::VWADDU_VL; break;
8110   case RISCVISD::VWSUBU_W_VL: VOpc = RISCVISD::VWSUBU_VL; break;
8111   }
8112 
8113   bool IsSigned = N->getOpcode() == RISCVISD::VWADD_W_VL ||
8114                   N->getOpcode() == RISCVISD::VWSUB_W_VL;
8115 
8116   SDLoc DL(N);
8117 
8118   // If the LHS is a sext or zext, we can narrow this op to the same size as
8119   // the RHS.
8120   if (((Op0.getOpcode() == RISCVISD::VZEXT_VL && !IsSigned) ||
8121        (Op0.getOpcode() == RISCVISD::VSEXT_VL && IsSigned)) &&
8122       Op0.hasOneUse() && Op0.getOperand(1) == Mask && Op0.getOperand(2) == VL) {
8123     unsigned ExtOpc = Op0.getOpcode();
8124     Op0 = Op0.getOperand(0);
8125     // Re-introduce narrower extends if needed.
8126     if (Op0.getValueType() != NarrowVT)
8127       Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
8128     return DAG.getNode(VOpc, DL, VT, Op0, Op1, Mask, VL);
8129   }
8130 
8131   bool IsAdd = N->getOpcode() == RISCVISD::VWADD_W_VL ||
8132                N->getOpcode() == RISCVISD::VWADDU_W_VL;
8133 
8134   // Look for splats on the left hand side of a vwadd(u).wv. We might be able
8135   // to commute and use a vwadd(u).vx instead.
8136   if (IsAdd && Op0.getOpcode() == RISCVISD::VMV_V_X_VL &&
8137       Op0.getOperand(0).isUndef() && Op0.getOperand(2) == VL) {
8138     Op0 = Op0.getOperand(1);
8139 
8140     // See if have enough sign bits or zero bits in the scalar to use a
8141     // widening add/sub by splatting to smaller element size.
8142     unsigned EltBits = VT.getScalarSizeInBits();
8143     unsigned ScalarBits = Op0.getValueSizeInBits();
8144     // Make sure we're getting all element bits from the scalar register.
8145     // FIXME: Support implicit sign extension of vmv.v.x?
8146     if (ScalarBits < EltBits)
8147       return SDValue();
8148 
8149     if (IsSigned) {
8150       if (DAG.ComputeNumSignBits(Op0) <= (ScalarBits - NarrowSize))
8151         return SDValue();
8152     } else {
8153       APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
8154       if (!DAG.MaskedValueIsZero(Op0, Mask))
8155         return SDValue();
8156     }
8157 
8158     Op0 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
8159                       DAG.getUNDEF(NarrowVT), Op0, VL);
8160     return DAG.getNode(VOpc, DL, VT, Op1, Op0, Mask, VL);
8161   }
8162 
8163   return SDValue();
8164 }
8165 
8166 // Try to form VWMUL, VWMULU or VWMULSU.
8167 // TODO: Support VWMULSU.vx with a sign extend Op and a splat of scalar Op.
8168 static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
8169                                        bool Commute) {
8170   assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
8171   SDValue Op0 = N->getOperand(0);
8172   SDValue Op1 = N->getOperand(1);
8173   if (Commute)
8174     std::swap(Op0, Op1);
8175 
8176   bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
8177   bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
8178   bool IsVWMULSU = IsSignExt && Op1.getOpcode() == RISCVISD::VZEXT_VL;
8179   if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
8180     return SDValue();
8181 
8182   SDValue Mask = N->getOperand(2);
8183   SDValue VL = N->getOperand(3);
8184 
8185   // Make sure the mask and VL match.
8186   if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)
8187     return SDValue();
8188 
8189   MVT VT = N->getSimpleValueType(0);
8190 
8191   // Determine the narrow size for a widening multiply.
8192   unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
8193   MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
8194                                   VT.getVectorElementCount());
8195 
8196   SDLoc DL(N);
8197 
8198   // See if the other operand is the same opcode.
8199   if (IsVWMULSU || Op0.getOpcode() == Op1.getOpcode()) {
8200     if (!Op1.hasOneUse())
8201       return SDValue();
8202 
8203     // Make sure the mask and VL match.
8204     if (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
8205       return SDValue();
8206 
8207     Op1 = Op1.getOperand(0);
8208   } else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
8209     // The operand is a splat of a scalar.
8210 
8211     // The pasthru must be undef for tail agnostic
8212     if (!Op1.getOperand(0).isUndef())
8213       return SDValue();
8214     // The VL must be the same.
8215     if (Op1.getOperand(2) != VL)
8216       return SDValue();
8217 
8218     // Get the scalar value.
8219     Op1 = Op1.getOperand(1);
8220 
8221     // See if have enough sign bits or zero bits in the scalar to use a
8222     // widening multiply by splatting to smaller element size.
8223     unsigned EltBits = VT.getScalarSizeInBits();
8224     unsigned ScalarBits = Op1.getValueSizeInBits();
8225     // Make sure we're getting all element bits from the scalar register.
8226     // FIXME: Support implicit sign extension of vmv.v.x?
8227     if (ScalarBits < EltBits)
8228       return SDValue();
8229 
8230     // If the LHS is a sign extend, try to use vwmul.
8231     if (IsSignExt && DAG.ComputeNumSignBits(Op1) > (ScalarBits - NarrowSize)) {
8232       // Can use vwmul.
8233     } else {
8234       // Otherwise try to use vwmulu or vwmulsu.
8235       APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
8236       if (DAG.MaskedValueIsZero(Op1, Mask))
8237         IsVWMULSU = IsSignExt;
8238       else
8239         return SDValue();
8240     }
8241 
8242     Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
8243                       DAG.getUNDEF(NarrowVT), Op1, VL);
8244   } else
8245     return SDValue();
8246 
8247   Op0 = Op0.getOperand(0);
8248 
8249   // Re-introduce narrower extends if needed.
8250   unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
8251   if (Op0.getValueType() != NarrowVT)
8252     Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
8253   // vwmulsu requires second operand to be zero extended.
8254   ExtOpc = IsVWMULSU ? RISCVISD::VZEXT_VL : ExtOpc;
8255   if (Op1.getValueType() != NarrowVT)
8256     Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
8257 
8258   unsigned WMulOpc = RISCVISD::VWMULSU_VL;
8259   if (!IsVWMULSU)
8260     WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
8261   return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
8262 }
8263 
8264 static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
8265   switch (Op.getOpcode()) {
8266   case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
8267   case ISD::FTRUNC:     return RISCVFPRndMode::RTZ;
8268   case ISD::FFLOOR:     return RISCVFPRndMode::RDN;
8269   case ISD::FCEIL:      return RISCVFPRndMode::RUP;
8270   case ISD::FROUND:     return RISCVFPRndMode::RMM;
8271   }
8272 
8273   return RISCVFPRndMode::Invalid;
8274 }
8275 
8276 // Fold
8277 //   (fp_to_int (froundeven X)) -> fcvt X, rne
8278 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
8279 //   (fp_to_int (ffloor X))     -> fcvt X, rdn
8280 //   (fp_to_int (fceil X))      -> fcvt X, rup
8281 //   (fp_to_int (fround X))     -> fcvt X, rmm
8282 static SDValue performFP_TO_INTCombine(SDNode *N,
8283                                        TargetLowering::DAGCombinerInfo &DCI,
8284                                        const RISCVSubtarget &Subtarget) {
8285   SelectionDAG &DAG = DCI.DAG;
8286   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8287   MVT XLenVT = Subtarget.getXLenVT();
8288 
8289   // Only handle XLen or i32 types. Other types narrower than XLen will
8290   // eventually be legalized to XLenVT.
8291   EVT VT = N->getValueType(0);
8292   if (VT != MVT::i32 && VT != XLenVT)
8293     return SDValue();
8294 
8295   SDValue Src = N->getOperand(0);
8296 
8297   // Ensure the FP type is also legal.
8298   if (!TLI.isTypeLegal(Src.getValueType()))
8299     return SDValue();
8300 
8301   // Don't do this for f16 with Zfhmin and not Zfh.
8302   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
8303     return SDValue();
8304 
8305   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
8306   if (FRM == RISCVFPRndMode::Invalid)
8307     return SDValue();
8308 
8309   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
8310 
8311   unsigned Opc;
8312   if (VT == XLenVT)
8313     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
8314   else
8315     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
8316 
8317   SDLoc DL(N);
8318   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
8319                                 DAG.getTargetConstant(FRM, DL, XLenVT));
8320   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
8321 }
8322 
8323 // Fold
8324 //   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
8325 //   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))
8326 //   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))
8327 //   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))
8328 //   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))
8329 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
8330                                        TargetLowering::DAGCombinerInfo &DCI,
8331                                        const RISCVSubtarget &Subtarget) {
8332   SelectionDAG &DAG = DCI.DAG;
8333   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8334   MVT XLenVT = Subtarget.getXLenVT();
8335 
8336   // Only handle XLen types. Other types narrower than XLen will eventually be
8337   // legalized to XLenVT.
8338   EVT DstVT = N->getValueType(0);
8339   if (DstVT != XLenVT)
8340     return SDValue();
8341 
8342   SDValue Src = N->getOperand(0);
8343 
8344   // Ensure the FP type is also legal.
8345   if (!TLI.isTypeLegal(Src.getValueType()))
8346     return SDValue();
8347 
8348   // Don't do this for f16 with Zfhmin and not Zfh.
8349   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
8350     return SDValue();
8351 
8352   EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
8353 
8354   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
8355   if (FRM == RISCVFPRndMode::Invalid)
8356     return SDValue();
8357 
8358   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
8359 
8360   unsigned Opc;
8361   if (SatVT == DstVT)
8362     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
8363   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
8364     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
8365   else
8366     return SDValue();
8367   // FIXME: Support other SatVTs by clamping before or after the conversion.
8368 
8369   Src = Src.getOperand(0);
8370 
8371   SDLoc DL(N);
8372   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
8373                                 DAG.getTargetConstant(FRM, DL, XLenVT));
8374 
8375   // RISCV FP-to-int conversions saturate to the destination register size, but
8376   // don't produce 0 for nan.
8377   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
8378   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
8379 }
8380 
8381 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
8382 // smaller than XLenVT.
8383 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
8384                                         const RISCVSubtarget &Subtarget) {
8385   assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
8386 
8387   SDValue Src = N->getOperand(0);
8388   if (Src.getOpcode() != ISD::BSWAP)
8389     return SDValue();
8390 
8391   EVT VT = N->getValueType(0);
8392   if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
8393       !isPowerOf2_32(VT.getSizeInBits()))
8394     return SDValue();
8395 
8396   SDLoc DL(N);
8397   return DAG.getNode(RISCVISD::GREV, DL, VT, Src.getOperand(0),
8398                      DAG.getConstant(7, DL, VT));
8399 }
8400 
8401 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
8402                                                DAGCombinerInfo &DCI) const {
8403   SelectionDAG &DAG = DCI.DAG;
8404 
8405   // Helper to call SimplifyDemandedBits on an operand of N where only some low
8406   // bits are demanded. N will be added to the Worklist if it was not deleted.
8407   // Caller should return SDValue(N, 0) if this returns true.
8408   auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
8409     SDValue Op = N->getOperand(OpNo);
8410     APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
8411     if (!SimplifyDemandedBits(Op, Mask, DCI))
8412       return false;
8413 
8414     if (N->getOpcode() != ISD::DELETED_NODE)
8415       DCI.AddToWorklist(N);
8416     return true;
8417   };
8418 
8419   switch (N->getOpcode()) {
8420   default:
8421     break;
8422   case RISCVISD::SplitF64: {
8423     SDValue Op0 = N->getOperand(0);
8424     // If the input to SplitF64 is just BuildPairF64 then the operation is
8425     // redundant. Instead, use BuildPairF64's operands directly.
8426     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
8427       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
8428 
8429     if (Op0->isUndef()) {
8430       SDValue Lo = DAG.getUNDEF(MVT::i32);
8431       SDValue Hi = DAG.getUNDEF(MVT::i32);
8432       return DCI.CombineTo(N, Lo, Hi);
8433     }
8434 
8435     SDLoc DL(N);
8436 
8437     // It's cheaper to materialise two 32-bit integers than to load a double
8438     // from the constant pool and transfer it to integer registers through the
8439     // stack.
8440     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
8441       APInt V = C->getValueAPF().bitcastToAPInt();
8442       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
8443       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
8444       return DCI.CombineTo(N, Lo, Hi);
8445     }
8446 
8447     // This is a target-specific version of a DAGCombine performed in
8448     // DAGCombiner::visitBITCAST. It performs the equivalent of:
8449     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8450     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8451     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
8452         !Op0.getNode()->hasOneUse())
8453       break;
8454     SDValue NewSplitF64 =
8455         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
8456                     Op0.getOperand(0));
8457     SDValue Lo = NewSplitF64.getValue(0);
8458     SDValue Hi = NewSplitF64.getValue(1);
8459     APInt SignBit = APInt::getSignMask(32);
8460     if (Op0.getOpcode() == ISD::FNEG) {
8461       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
8462                                   DAG.getConstant(SignBit, DL, MVT::i32));
8463       return DCI.CombineTo(N, Lo, NewHi);
8464     }
8465     assert(Op0.getOpcode() == ISD::FABS);
8466     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
8467                                 DAG.getConstant(~SignBit, DL, MVT::i32));
8468     return DCI.CombineTo(N, Lo, NewHi);
8469   }
8470   case RISCVISD::SLLW:
8471   case RISCVISD::SRAW:
8472   case RISCVISD::SRLW: {
8473     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
8474     if (SimplifyDemandedLowBitsHelper(0, 32) ||
8475         SimplifyDemandedLowBitsHelper(1, 5))
8476       return SDValue(N, 0);
8477 
8478     break;
8479   }
8480   case ISD::ROTR:
8481   case ISD::ROTL:
8482   case RISCVISD::RORW:
8483   case RISCVISD::ROLW: {
8484     if (N->getOpcode() == RISCVISD::RORW || N->getOpcode() == RISCVISD::ROLW) {
8485       // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
8486       if (SimplifyDemandedLowBitsHelper(0, 32) ||
8487           SimplifyDemandedLowBitsHelper(1, 5))
8488         return SDValue(N, 0);
8489     }
8490 
8491     return combineROTR_ROTL_RORW_ROLW(N, DAG, Subtarget);
8492   }
8493   case RISCVISD::CLZW:
8494   case RISCVISD::CTZW: {
8495     // Only the lower 32 bits of the first operand are read
8496     if (SimplifyDemandedLowBitsHelper(0, 32))
8497       return SDValue(N, 0);
8498     break;
8499   }
8500   case RISCVISD::GREV:
8501   case RISCVISD::GORC: {
8502     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
8503     unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
8504     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
8505     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth)))
8506       return SDValue(N, 0);
8507 
8508     return combineGREVI_GORCI(N, DAG);
8509   }
8510   case RISCVISD::GREVW:
8511   case RISCVISD::GORCW: {
8512     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
8513     if (SimplifyDemandedLowBitsHelper(0, 32) ||
8514         SimplifyDemandedLowBitsHelper(1, 5))
8515       return SDValue(N, 0);
8516 
8517     break;
8518   }
8519   case RISCVISD::SHFL:
8520   case RISCVISD::UNSHFL: {
8521     // Only the lower log2(Bitwidth)-1 bits of the the shift amount are read.
8522     unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
8523     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
8524     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) - 1))
8525       return SDValue(N, 0);
8526 
8527     break;
8528   }
8529   case RISCVISD::SHFLW:
8530   case RISCVISD::UNSHFLW: {
8531     // Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
8532     if (SimplifyDemandedLowBitsHelper(0, 32) ||
8533         SimplifyDemandedLowBitsHelper(1, 4))
8534       return SDValue(N, 0);
8535 
8536     break;
8537   }
8538   case RISCVISD::BCOMPRESSW:
8539   case RISCVISD::BDECOMPRESSW: {
8540     // Only the lower 32 bits of LHS and RHS are read.
8541     if (SimplifyDemandedLowBitsHelper(0, 32) ||
8542         SimplifyDemandedLowBitsHelper(1, 32))
8543       return SDValue(N, 0);
8544 
8545     break;
8546   }
8547   case RISCVISD::FSR:
8548   case RISCVISD::FSL:
8549   case RISCVISD::FSRW:
8550   case RISCVISD::FSLW: {
8551     bool IsWInstruction =
8552         N->getOpcode() == RISCVISD::FSRW || N->getOpcode() == RISCVISD::FSLW;
8553     unsigned BitWidth =
8554         IsWInstruction ? 32 : N->getSimpleValueType(0).getSizeInBits();
8555     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
8556     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
8557     if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) + 1))
8558       return SDValue(N, 0);
8559 
8560     break;
8561   }
8562   case RISCVISD::FMV_X_ANYEXTH:
8563   case RISCVISD::FMV_X_ANYEXTW_RV64: {
8564     SDLoc DL(N);
8565     SDValue Op0 = N->getOperand(0);
8566     MVT VT = N->getSimpleValueType(0);
8567     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
8568     // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
8569     // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
8570     if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
8571          Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
8572         (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
8573          Op0->getOpcode() == RISCVISD::FMV_H_X)) {
8574       assert(Op0.getOperand(0).getValueType() == VT &&
8575              "Unexpected value type!");
8576       return Op0.getOperand(0);
8577     }
8578 
8579     // This is a target-specific version of a DAGCombine performed in
8580     // DAGCombiner::visitBITCAST. It performs the equivalent of:
8581     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8582     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8583     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
8584         !Op0.getNode()->hasOneUse())
8585       break;
8586     SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
8587     unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
8588     APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
8589     if (Op0.getOpcode() == ISD::FNEG)
8590       return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
8591                          DAG.getConstant(SignBit, DL, VT));
8592 
8593     assert(Op0.getOpcode() == ISD::FABS);
8594     return DAG.getNode(ISD::AND, DL, VT, NewFMV,
8595                        DAG.getConstant(~SignBit, DL, VT));
8596   }
8597   case ISD::ADD:
8598     return performADDCombine(N, DAG, Subtarget);
8599   case ISD::SUB:
8600     return performSUBCombine(N, DAG);
8601   case ISD::AND:
8602     return performANDCombine(N, DAG, Subtarget);
8603   case ISD::OR:
8604     return performORCombine(N, DAG, Subtarget);
8605   case ISD::XOR:
8606     return performXORCombine(N, DAG);
8607   case ISD::FADD:
8608   case ISD::UMAX:
8609   case ISD::UMIN:
8610   case ISD::SMAX:
8611   case ISD::SMIN:
8612   case ISD::FMAXNUM:
8613   case ISD::FMINNUM:
8614     return combineBinOpToReduce(N, DAG);
8615   case ISD::SIGN_EXTEND_INREG:
8616     return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
8617   case ISD::ZERO_EXTEND:
8618     // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
8619     // type legalization. This is safe because fp_to_uint produces poison if
8620     // it overflows.
8621     if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
8622       SDValue Src = N->getOperand(0);
8623       if (Src.getOpcode() == ISD::FP_TO_UINT &&
8624           isTypeLegal(Src.getOperand(0).getValueType()))
8625         return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
8626                            Src.getOperand(0));
8627       if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
8628           isTypeLegal(Src.getOperand(1).getValueType())) {
8629         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
8630         SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
8631                                   Src.getOperand(0), Src.getOperand(1));
8632         DCI.CombineTo(N, Res);
8633         DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
8634         DCI.recursivelyDeleteUnusedNodes(Src.getNode());
8635         return SDValue(N, 0); // Return N so it doesn't get rechecked.
8636       }
8637     }
8638     return SDValue();
8639   case RISCVISD::SELECT_CC: {
8640     // Transform
8641     SDValue LHS = N->getOperand(0);
8642     SDValue RHS = N->getOperand(1);
8643     SDValue TrueV = N->getOperand(3);
8644     SDValue FalseV = N->getOperand(4);
8645 
8646     // If the True and False values are the same, we don't need a select_cc.
8647     if (TrueV == FalseV)
8648       return TrueV;
8649 
8650     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
8651     if (!ISD::isIntEqualitySetCC(CCVal))
8652       break;
8653 
8654     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
8655     //      (select_cc X, Y, lt, trueV, falseV)
8656     // Sometimes the setcc is introduced after select_cc has been formed.
8657     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
8658         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
8659       // If we're looking for eq 0 instead of ne 0, we need to invert the
8660       // condition.
8661       bool Invert = CCVal == ISD::SETEQ;
8662       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
8663       if (Invert)
8664         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
8665 
8666       SDLoc DL(N);
8667       RHS = LHS.getOperand(1);
8668       LHS = LHS.getOperand(0);
8669       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8670 
8671       SDValue TargetCC = DAG.getCondCode(CCVal);
8672       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
8673                          {LHS, RHS, TargetCC, TrueV, FalseV});
8674     }
8675 
8676     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
8677     //      (select_cc X, Y, eq/ne, trueV, falseV)
8678     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
8679       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
8680                          {LHS.getOperand(0), LHS.getOperand(1),
8681                           N->getOperand(2), TrueV, FalseV});
8682     // (select_cc X, 1, setne, trueV, falseV) ->
8683     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
8684     // This can occur when legalizing some floating point comparisons.
8685     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
8686     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
8687       SDLoc DL(N);
8688       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
8689       SDValue TargetCC = DAG.getCondCode(CCVal);
8690       RHS = DAG.getConstant(0, DL, LHS.getValueType());
8691       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
8692                          {LHS, RHS, TargetCC, TrueV, FalseV});
8693     }
8694 
8695     break;
8696   }
8697   case RISCVISD::BR_CC: {
8698     SDValue LHS = N->getOperand(1);
8699     SDValue RHS = N->getOperand(2);
8700     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
8701     if (!ISD::isIntEqualitySetCC(CCVal))
8702       break;
8703 
8704     // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
8705     //      (br_cc X, Y, lt, dest)
8706     // Sometimes the setcc is introduced after br_cc has been formed.
8707     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
8708         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
8709       // If we're looking for eq 0 instead of ne 0, we need to invert the
8710       // condition.
8711       bool Invert = CCVal == ISD::SETEQ;
8712       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
8713       if (Invert)
8714         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
8715 
8716       SDLoc DL(N);
8717       RHS = LHS.getOperand(1);
8718       LHS = LHS.getOperand(0);
8719       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8720 
8721       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
8722                          N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
8723                          N->getOperand(4));
8724     }
8725 
8726     // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
8727     //      (br_cc X, Y, eq/ne, trueV, falseV)
8728     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
8729       return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
8730                          N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
8731                          N->getOperand(3), N->getOperand(4));
8732 
8733     // (br_cc X, 1, setne, br_cc) ->
8734     // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
8735     // This can occur when legalizing some floating point comparisons.
8736     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
8737     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
8738       SDLoc DL(N);
8739       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
8740       SDValue TargetCC = DAG.getCondCode(CCVal);
8741       RHS = DAG.getConstant(0, DL, LHS.getValueType());
8742       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
8743                          N->getOperand(0), LHS, RHS, TargetCC,
8744                          N->getOperand(4));
8745     }
8746     break;
8747   }
8748   case ISD::BITREVERSE:
8749     return performBITREVERSECombine(N, DAG, Subtarget);
8750   case ISD::FP_TO_SINT:
8751   case ISD::FP_TO_UINT:
8752     return performFP_TO_INTCombine(N, DCI, Subtarget);
8753   case ISD::FP_TO_SINT_SAT:
8754   case ISD::FP_TO_UINT_SAT:
8755     return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
8756   case ISD::FCOPYSIGN: {
8757     EVT VT = N->getValueType(0);
8758     if (!VT.isVector())
8759       break;
8760     // There is a form of VFSGNJ which injects the negated sign of its second
8761     // operand. Try and bubble any FNEG up after the extend/round to produce
8762     // this optimized pattern. Avoid modifying cases where FP_ROUND and
8763     // TRUNC=1.
8764     SDValue In2 = N->getOperand(1);
8765     // Avoid cases where the extend/round has multiple uses, as duplicating
8766     // those is typically more expensive than removing a fneg.
8767     if (!In2.hasOneUse())
8768       break;
8769     if (In2.getOpcode() != ISD::FP_EXTEND &&
8770         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
8771       break;
8772     In2 = In2.getOperand(0);
8773     if (In2.getOpcode() != ISD::FNEG)
8774       break;
8775     SDLoc DL(N);
8776     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
8777     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
8778                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
8779   }
8780   case ISD::MGATHER:
8781   case ISD::MSCATTER:
8782   case ISD::VP_GATHER:
8783   case ISD::VP_SCATTER: {
8784     if (!DCI.isBeforeLegalize())
8785       break;
8786     SDValue Index, ScaleOp;
8787     bool IsIndexScaled = false;
8788     bool IsIndexSigned = false;
8789     if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
8790       Index = VPGSN->getIndex();
8791       ScaleOp = VPGSN->getScale();
8792       IsIndexScaled = VPGSN->isIndexScaled();
8793       IsIndexSigned = VPGSN->isIndexSigned();
8794     } else {
8795       const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
8796       Index = MGSN->getIndex();
8797       ScaleOp = MGSN->getScale();
8798       IsIndexScaled = MGSN->isIndexScaled();
8799       IsIndexSigned = MGSN->isIndexSigned();
8800     }
8801     EVT IndexVT = Index.getValueType();
8802     MVT XLenVT = Subtarget.getXLenVT();
8803     // RISCV indexed loads only support the "unsigned unscaled" addressing
8804     // mode, so anything else must be manually legalized.
8805     bool NeedsIdxLegalization =
8806         IsIndexScaled ||
8807         (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
8808     if (!NeedsIdxLegalization)
8809       break;
8810 
8811     SDLoc DL(N);
8812 
8813     // Any index legalization should first promote to XLenVT, so we don't lose
8814     // bits when scaling. This may create an illegal index type so we let
8815     // LLVM's legalization take care of the splitting.
8816     // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
8817     if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
8818       IndexVT = IndexVT.changeVectorElementType(XLenVT);
8819       Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
8820                           DL, IndexVT, Index);
8821     }
8822 
8823     if (IsIndexScaled) {
8824       // Manually scale the indices.
8825       // TODO: Sanitize the scale operand here?
8826       // TODO: For VP nodes, should we use VP_SHL here?
8827       unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
8828       assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
8829       SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
8830       Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
8831       ScaleOp = DAG.getTargetConstant(1, DL, ScaleOp.getValueType());
8832     }
8833 
8834     ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED;
8835     if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
8836       return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
8837                              {VPGN->getChain(), VPGN->getBasePtr(), Index,
8838                               ScaleOp, VPGN->getMask(),
8839                               VPGN->getVectorLength()},
8840                              VPGN->getMemOperand(), NewIndexTy);
8841     if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
8842       return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
8843                               {VPSN->getChain(), VPSN->getValue(),
8844                                VPSN->getBasePtr(), Index, ScaleOp,
8845                                VPSN->getMask(), VPSN->getVectorLength()},
8846                               VPSN->getMemOperand(), NewIndexTy);
8847     if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
8848       return DAG.getMaskedGather(
8849           N->getVTList(), MGN->getMemoryVT(), DL,
8850           {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
8851            MGN->getBasePtr(), Index, ScaleOp},
8852           MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
8853     const auto *MSN = cast<MaskedScatterSDNode>(N);
8854     return DAG.getMaskedScatter(
8855         N->getVTList(), MSN->getMemoryVT(), DL,
8856         {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
8857          Index, ScaleOp},
8858         MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
8859   }
8860   case RISCVISD::SRA_VL:
8861   case RISCVISD::SRL_VL:
8862   case RISCVISD::SHL_VL: {
8863     SDValue ShAmt = N->getOperand(1);
8864     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
8865       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
8866       SDLoc DL(N);
8867       SDValue VL = N->getOperand(3);
8868       EVT VT = N->getValueType(0);
8869       ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8870                           ShAmt.getOperand(1), VL);
8871       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
8872                          N->getOperand(2), N->getOperand(3));
8873     }
8874     break;
8875   }
8876   case ISD::SRA:
8877   case ISD::SRL:
8878   case ISD::SHL: {
8879     SDValue ShAmt = N->getOperand(1);
8880     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
8881       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
8882       SDLoc DL(N);
8883       EVT VT = N->getValueType(0);
8884       ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8885                           ShAmt.getOperand(1),
8886                           DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
8887       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
8888     }
8889     break;
8890   }
8891   case RISCVISD::ADD_VL:
8892     if (SDValue V = combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ false))
8893       return V;
8894     return combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ true);
8895   case RISCVISD::SUB_VL:
8896     return combineADDSUB_VLToVWADDSUB_VL(N, DAG);
8897   case RISCVISD::VWADD_W_VL:
8898   case RISCVISD::VWADDU_W_VL:
8899   case RISCVISD::VWSUB_W_VL:
8900   case RISCVISD::VWSUBU_W_VL:
8901     return combineVWADD_W_VL_VWSUB_W_VL(N, DAG);
8902   case RISCVISD::MUL_VL:
8903     if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false))
8904       return V;
8905     // Mul is commutative.
8906     return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true);
8907   case ISD::STORE: {
8908     auto *Store = cast<StoreSDNode>(N);
8909     SDValue Val = Store->getValue();
8910     // Combine store of vmv.x.s to vse with VL of 1.
8911     // FIXME: Support FP.
8912     if (Val.getOpcode() == RISCVISD::VMV_X_S) {
8913       SDValue Src = Val.getOperand(0);
8914       EVT VecVT = Src.getValueType();
8915       EVT MemVT = Store->getMemoryVT();
8916       // The memory VT and the element type must match.
8917       if (VecVT.getVectorElementType() == MemVT) {
8918         SDLoc DL(N);
8919         MVT MaskVT = getMaskTypeFor(VecVT);
8920         return DAG.getStoreVP(
8921             Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
8922             DAG.getConstant(1, DL, MaskVT),
8923             DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
8924             Store->getMemOperand(), Store->getAddressingMode(),
8925             Store->isTruncatingStore(), /*IsCompress*/ false);
8926       }
8927     }
8928 
8929     break;
8930   }
8931   case ISD::SPLAT_VECTOR: {
8932     EVT VT = N->getValueType(0);
8933     // Only perform this combine on legal MVT types.
8934     if (!isTypeLegal(VT))
8935       break;
8936     if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
8937                                          DAG, Subtarget))
8938       return Gather;
8939     break;
8940   }
8941   case RISCVISD::VMV_V_X_VL: {
8942     // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
8943     // scalar input.
8944     unsigned ScalarSize = N->getOperand(1).getValueSizeInBits();
8945     unsigned EltWidth = N->getValueType(0).getScalarSizeInBits();
8946     if (ScalarSize > EltWidth && N->getOperand(0).isUndef())
8947       if (SimplifyDemandedLowBitsHelper(1, EltWidth))
8948         return SDValue(N, 0);
8949 
8950     break;
8951   }
8952   case ISD::INTRINSIC_WO_CHAIN: {
8953     unsigned IntNo = N->getConstantOperandVal(0);
8954     switch (IntNo) {
8955       // By default we do not combine any intrinsic.
8956     default:
8957       return SDValue();
8958     case Intrinsic::riscv_vcpop:
8959     case Intrinsic::riscv_vcpop_mask:
8960     case Intrinsic::riscv_vfirst:
8961     case Intrinsic::riscv_vfirst_mask: {
8962       SDValue VL = N->getOperand(2);
8963       if (IntNo == Intrinsic::riscv_vcpop_mask ||
8964           IntNo == Intrinsic::riscv_vfirst_mask)
8965         VL = N->getOperand(3);
8966       if (!isNullConstant(VL))
8967         return SDValue();
8968       // If VL is 0, vcpop -> li 0, vfirst -> li -1.
8969       SDLoc DL(N);
8970       EVT VT = N->getValueType(0);
8971       if (IntNo == Intrinsic::riscv_vfirst ||
8972           IntNo == Intrinsic::riscv_vfirst_mask)
8973         return DAG.getConstant(-1, DL, VT);
8974       return DAG.getConstant(0, DL, VT);
8975     }
8976     }
8977   }
8978   }
8979 
8980   return SDValue();
8981 }
8982 
8983 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
8984     const SDNode *N, CombineLevel Level) const {
8985   // The following folds are only desirable if `(OP _, c1 << c2)` can be
8986   // materialised in fewer instructions than `(OP _, c1)`:
8987   //
8988   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8989   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8990   SDValue N0 = N->getOperand(0);
8991   EVT Ty = N0.getValueType();
8992   if (Ty.isScalarInteger() &&
8993       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
8994     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
8995     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
8996     if (C1 && C2) {
8997       const APInt &C1Int = C1->getAPIntValue();
8998       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
8999 
9000       // We can materialise `c1 << c2` into an add immediate, so it's "free",
9001       // and the combine should happen, to potentially allow further combines
9002       // later.
9003       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
9004           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
9005         return true;
9006 
9007       // We can materialise `c1` in an add immediate, so it's "free", and the
9008       // combine should be prevented.
9009       if (C1Int.getMinSignedBits() <= 64 &&
9010           isLegalAddImmediate(C1Int.getSExtValue()))
9011         return false;
9012 
9013       // Neither constant will fit into an immediate, so find materialisation
9014       // costs.
9015       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
9016                                               Subtarget.getFeatureBits(),
9017                                               /*CompressionCost*/true);
9018       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
9019           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
9020           /*CompressionCost*/true);
9021 
9022       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
9023       // combine should be prevented.
9024       if (C1Cost < ShiftedC1Cost)
9025         return false;
9026     }
9027   }
9028   return true;
9029 }
9030 
9031 bool RISCVTargetLowering::targetShrinkDemandedConstant(
9032     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
9033     TargetLoweringOpt &TLO) const {
9034   // Delay this optimization as late as possible.
9035   if (!TLO.LegalOps)
9036     return false;
9037 
9038   EVT VT = Op.getValueType();
9039   if (VT.isVector())
9040     return false;
9041 
9042   // Only handle AND for now.
9043   if (Op.getOpcode() != ISD::AND)
9044     return false;
9045 
9046   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
9047   if (!C)
9048     return false;
9049 
9050   const APInt &Mask = C->getAPIntValue();
9051 
9052   // Clear all non-demanded bits initially.
9053   APInt ShrunkMask = Mask & DemandedBits;
9054 
9055   // Try to make a smaller immediate by setting undemanded bits.
9056 
9057   APInt ExpandedMask = Mask | ~DemandedBits;
9058 
9059   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
9060     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
9061   };
9062   auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
9063     if (NewMask == Mask)
9064       return true;
9065     SDLoc DL(Op);
9066     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
9067     SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
9068     return TLO.CombineTo(Op, NewOp);
9069   };
9070 
9071   // If the shrunk mask fits in sign extended 12 bits, let the target
9072   // independent code apply it.
9073   if (ShrunkMask.isSignedIntN(12))
9074     return false;
9075 
9076   // Preserve (and X, 0xffff) when zext.h is supported.
9077   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
9078     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
9079     if (IsLegalMask(NewMask))
9080       return UseMask(NewMask);
9081   }
9082 
9083   // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
9084   if (VT == MVT::i64) {
9085     APInt NewMask = APInt(64, 0xffffffff);
9086     if (IsLegalMask(NewMask))
9087       return UseMask(NewMask);
9088   }
9089 
9090   // For the remaining optimizations, we need to be able to make a negative
9091   // number through a combination of mask and undemanded bits.
9092   if (!ExpandedMask.isNegative())
9093     return false;
9094 
9095   // What is the fewest number of bits we need to represent the negative number.
9096   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
9097 
9098   // Try to make a 12 bit negative immediate. If that fails try to make a 32
9099   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
9100   APInt NewMask = ShrunkMask;
9101   if (MinSignedBits <= 12)
9102     NewMask.setBitsFrom(11);
9103   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
9104     NewMask.setBitsFrom(31);
9105   else
9106     return false;
9107 
9108   // Check that our new mask is a subset of the demanded mask.
9109   assert(IsLegalMask(NewMask));
9110   return UseMask(NewMask);
9111 }
9112 
9113 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
9114   static const uint64_t GREVMasks[] = {
9115       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
9116       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
9117 
9118   for (unsigned Stage = 0; Stage != 6; ++Stage) {
9119     unsigned Shift = 1 << Stage;
9120     if (ShAmt & Shift) {
9121       uint64_t Mask = GREVMasks[Stage];
9122       uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
9123       if (IsGORC)
9124         Res |= x;
9125       x = Res;
9126     }
9127   }
9128 
9129   return x;
9130 }
9131 
9132 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
9133                                                         KnownBits &Known,
9134                                                         const APInt &DemandedElts,
9135                                                         const SelectionDAG &DAG,
9136                                                         unsigned Depth) const {
9137   unsigned BitWidth = Known.getBitWidth();
9138   unsigned Opc = Op.getOpcode();
9139   assert((Opc >= ISD::BUILTIN_OP_END ||
9140           Opc == ISD::INTRINSIC_WO_CHAIN ||
9141           Opc == ISD::INTRINSIC_W_CHAIN ||
9142           Opc == ISD::INTRINSIC_VOID) &&
9143          "Should use MaskedValueIsZero if you don't know whether Op"
9144          " is a target node!");
9145 
9146   Known.resetAll();
9147   switch (Opc) {
9148   default: break;
9149   case RISCVISD::SELECT_CC: {
9150     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
9151     // If we don't know any bits, early out.
9152     if (Known.isUnknown())
9153       break;
9154     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
9155 
9156     // Only known if known in both the LHS and RHS.
9157     Known = KnownBits::commonBits(Known, Known2);
9158     break;
9159   }
9160   case RISCVISD::REMUW: {
9161     KnownBits Known2;
9162     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
9163     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
9164     // We only care about the lower 32 bits.
9165     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
9166     // Restore the original width by sign extending.
9167     Known = Known.sext(BitWidth);
9168     break;
9169   }
9170   case RISCVISD::DIVUW: {
9171     KnownBits Known2;
9172     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
9173     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
9174     // We only care about the lower 32 bits.
9175     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
9176     // Restore the original width by sign extending.
9177     Known = Known.sext(BitWidth);
9178     break;
9179   }
9180   case RISCVISD::CTZW: {
9181     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
9182     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
9183     unsigned LowBits = Log2_32(PossibleTZ) + 1;
9184     Known.Zero.setBitsFrom(LowBits);
9185     break;
9186   }
9187   case RISCVISD::CLZW: {
9188     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
9189     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
9190     unsigned LowBits = Log2_32(PossibleLZ) + 1;
9191     Known.Zero.setBitsFrom(LowBits);
9192     break;
9193   }
9194   case RISCVISD::GREV:
9195   case RISCVISD::GORC: {
9196     if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9197       Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
9198       unsigned ShAmt = C->getZExtValue() & (Known.getBitWidth() - 1);
9199       bool IsGORC = Op.getOpcode() == RISCVISD::GORC;
9200       // To compute zeros, we need to invert the value and invert it back after.
9201       Known.Zero =
9202           ~computeGREVOrGORC(~Known.Zero.getZExtValue(), ShAmt, IsGORC);
9203       Known.One = computeGREVOrGORC(Known.One.getZExtValue(), ShAmt, IsGORC);
9204     }
9205     break;
9206   }
9207   case RISCVISD::READ_VLENB: {
9208     // If we know the minimum VLen from Zvl extensions, we can use that to
9209     // determine the trailing zeros of VLENB.
9210     // FIXME: Limit to 128 bit vectors until we have more testing.
9211     unsigned MinVLenB = std::min(128U, Subtarget.getMinVLen()) / 8;
9212     if (MinVLenB > 0)
9213       Known.Zero.setLowBits(Log2_32(MinVLenB));
9214     // We assume VLENB is no more than 65536 / 8 bytes.
9215     Known.Zero.setBitsFrom(14);
9216     break;
9217   }
9218   case ISD::INTRINSIC_W_CHAIN:
9219   case ISD::INTRINSIC_WO_CHAIN: {
9220     unsigned IntNo =
9221         Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
9222     switch (IntNo) {
9223     default:
9224       // We can't do anything for most intrinsics.
9225       break;
9226     case Intrinsic::riscv_vsetvli:
9227     case Intrinsic::riscv_vsetvlimax:
9228     case Intrinsic::riscv_vsetvli_opt:
9229     case Intrinsic::riscv_vsetvlimax_opt:
9230       // Assume that VL output is positive and would fit in an int32_t.
9231       // TODO: VLEN might be capped at 16 bits in a future V spec update.
9232       if (BitWidth >= 32)
9233         Known.Zero.setBitsFrom(31);
9234       break;
9235     }
9236     break;
9237   }
9238   }
9239 }
9240 
9241 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
9242     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9243     unsigned Depth) const {
9244   switch (Op.getOpcode()) {
9245   default:
9246     break;
9247   case RISCVISD::SELECT_CC: {
9248     unsigned Tmp =
9249         DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
9250     if (Tmp == 1) return 1;  // Early out.
9251     unsigned Tmp2 =
9252         DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
9253     return std::min(Tmp, Tmp2);
9254   }
9255   case RISCVISD::SLLW:
9256   case RISCVISD::SRAW:
9257   case RISCVISD::SRLW:
9258   case RISCVISD::DIVW:
9259   case RISCVISD::DIVUW:
9260   case RISCVISD::REMUW:
9261   case RISCVISD::ROLW:
9262   case RISCVISD::RORW:
9263   case RISCVISD::GREVW:
9264   case RISCVISD::GORCW:
9265   case RISCVISD::FSLW:
9266   case RISCVISD::FSRW:
9267   case RISCVISD::SHFLW:
9268   case RISCVISD::UNSHFLW:
9269   case RISCVISD::BCOMPRESSW:
9270   case RISCVISD::BDECOMPRESSW:
9271   case RISCVISD::BFPW:
9272   case RISCVISD::FCVT_W_RV64:
9273   case RISCVISD::FCVT_WU_RV64:
9274   case RISCVISD::STRICT_FCVT_W_RV64:
9275   case RISCVISD::STRICT_FCVT_WU_RV64:
9276     // TODO: As the result is sign-extended, this is conservatively correct. A
9277     // more precise answer could be calculated for SRAW depending on known
9278     // bits in the shift amount.
9279     return 33;
9280   case RISCVISD::SHFL:
9281   case RISCVISD::UNSHFL: {
9282     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
9283     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
9284     // will stay within the upper 32 bits. If there were more than 32 sign bits
9285     // before there will be at least 33 sign bits after.
9286     if (Op.getValueType() == MVT::i64 &&
9287         isa<ConstantSDNode>(Op.getOperand(1)) &&
9288         (Op.getConstantOperandVal(1) & 0x10) == 0) {
9289       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
9290       if (Tmp > 32)
9291         return 33;
9292     }
9293     break;
9294   }
9295   case RISCVISD::VMV_X_S: {
9296     // The number of sign bits of the scalar result is computed by obtaining the
9297     // element type of the input vector operand, subtracting its width from the
9298     // XLEN, and then adding one (sign bit within the element type). If the
9299     // element type is wider than XLen, the least-significant XLEN bits are
9300     // taken.
9301     unsigned XLen = Subtarget.getXLen();
9302     unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
9303     if (EltBits <= XLen)
9304       return XLen - EltBits + 1;
9305     break;
9306   }
9307   }
9308 
9309   return 1;
9310 }
9311 
9312 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
9313                                                   MachineBasicBlock *BB) {
9314   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
9315 
9316   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
9317   // Should the count have wrapped while it was being read, we need to try
9318   // again.
9319   // ...
9320   // read:
9321   // rdcycleh x3 # load high word of cycle
9322   // rdcycle  x2 # load low word of cycle
9323   // rdcycleh x4 # load high word of cycle
9324   // bne x3, x4, read # check if high word reads match, otherwise try again
9325   // ...
9326 
9327   MachineFunction &MF = *BB->getParent();
9328   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9329   MachineFunction::iterator It = ++BB->getIterator();
9330 
9331   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
9332   MF.insert(It, LoopMBB);
9333 
9334   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
9335   MF.insert(It, DoneMBB);
9336 
9337   // Transfer the remainder of BB and its successor edges to DoneMBB.
9338   DoneMBB->splice(DoneMBB->begin(), BB,
9339                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
9340   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
9341 
9342   BB->addSuccessor(LoopMBB);
9343 
9344   MachineRegisterInfo &RegInfo = MF.getRegInfo();
9345   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
9346   Register LoReg = MI.getOperand(0).getReg();
9347   Register HiReg = MI.getOperand(1).getReg();
9348   DebugLoc DL = MI.getDebugLoc();
9349 
9350   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
9351   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
9352       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
9353       .addReg(RISCV::X0);
9354   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
9355       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
9356       .addReg(RISCV::X0);
9357   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
9358       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
9359       .addReg(RISCV::X0);
9360 
9361   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
9362       .addReg(HiReg)
9363       .addReg(ReadAgainReg)
9364       .addMBB(LoopMBB);
9365 
9366   LoopMBB->addSuccessor(LoopMBB);
9367   LoopMBB->addSuccessor(DoneMBB);
9368 
9369   MI.eraseFromParent();
9370 
9371   return DoneMBB;
9372 }
9373 
9374 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
9375                                              MachineBasicBlock *BB) {
9376   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
9377 
9378   MachineFunction &MF = *BB->getParent();
9379   DebugLoc DL = MI.getDebugLoc();
9380   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
9381   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
9382   Register LoReg = MI.getOperand(0).getReg();
9383   Register HiReg = MI.getOperand(1).getReg();
9384   Register SrcReg = MI.getOperand(2).getReg();
9385   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
9386   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
9387 
9388   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
9389                           RI);
9390   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
9391   MachineMemOperand *MMOLo =
9392       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
9393   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
9394       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
9395   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
9396       .addFrameIndex(FI)
9397       .addImm(0)
9398       .addMemOperand(MMOLo);
9399   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
9400       .addFrameIndex(FI)
9401       .addImm(4)
9402       .addMemOperand(MMOHi);
9403   MI.eraseFromParent(); // The pseudo instruction is gone now.
9404   return BB;
9405 }
9406 
9407 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
9408                                                  MachineBasicBlock *BB) {
9409   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
9410          "Unexpected instruction");
9411 
9412   MachineFunction &MF = *BB->getParent();
9413   DebugLoc DL = MI.getDebugLoc();
9414   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
9415   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
9416   Register DstReg = MI.getOperand(0).getReg();
9417   Register LoReg = MI.getOperand(1).getReg();
9418   Register HiReg = MI.getOperand(2).getReg();
9419   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
9420   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
9421 
9422   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
9423   MachineMemOperand *MMOLo =
9424       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
9425   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
9426       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
9427   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
9428       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
9429       .addFrameIndex(FI)
9430       .addImm(0)
9431       .addMemOperand(MMOLo);
9432   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
9433       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
9434       .addFrameIndex(FI)
9435       .addImm(4)
9436       .addMemOperand(MMOHi);
9437   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
9438   MI.eraseFromParent(); // The pseudo instruction is gone now.
9439   return BB;
9440 }
9441 
9442 static bool isSelectPseudo(MachineInstr &MI) {
9443   switch (MI.getOpcode()) {
9444   default:
9445     return false;
9446   case RISCV::Select_GPR_Using_CC_GPR:
9447   case RISCV::Select_FPR16_Using_CC_GPR:
9448   case RISCV::Select_FPR32_Using_CC_GPR:
9449   case RISCV::Select_FPR64_Using_CC_GPR:
9450     return true;
9451   }
9452 }
9453 
9454 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
9455                                         unsigned RelOpcode, unsigned EqOpcode,
9456                                         const RISCVSubtarget &Subtarget) {
9457   DebugLoc DL = MI.getDebugLoc();
9458   Register DstReg = MI.getOperand(0).getReg();
9459   Register Src1Reg = MI.getOperand(1).getReg();
9460   Register Src2Reg = MI.getOperand(2).getReg();
9461   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
9462   Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
9463   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
9464 
9465   // Save the current FFLAGS.
9466   BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
9467 
9468   auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
9469                  .addReg(Src1Reg)
9470                  .addReg(Src2Reg);
9471   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
9472     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
9473 
9474   // Restore the FFLAGS.
9475   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
9476       .addReg(SavedFFlags, RegState::Kill);
9477 
9478   // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
9479   auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
9480                   .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
9481                   .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
9482   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
9483     MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
9484 
9485   // Erase the pseudoinstruction.
9486   MI.eraseFromParent();
9487   return BB;
9488 }
9489 
9490 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
9491                                            MachineBasicBlock *BB,
9492                                            const RISCVSubtarget &Subtarget) {
9493   // To "insert" Select_* instructions, we actually have to insert the triangle
9494   // control-flow pattern.  The incoming instructions know the destination vreg
9495   // to set, the condition code register to branch on, the true/false values to
9496   // select between, and the condcode to use to select the appropriate branch.
9497   //
9498   // We produce the following control flow:
9499   //     HeadMBB
9500   //     |  \
9501   //     |  IfFalseMBB
9502   //     | /
9503   //    TailMBB
9504   //
9505   // When we find a sequence of selects we attempt to optimize their emission
9506   // by sharing the control flow. Currently we only handle cases where we have
9507   // multiple selects with the exact same condition (same LHS, RHS and CC).
9508   // The selects may be interleaved with other instructions if the other
9509   // instructions meet some requirements we deem safe:
9510   // - They are debug instructions. Otherwise,
9511   // - They do not have side-effects, do not access memory and their inputs do
9512   //   not depend on the results of the select pseudo-instructions.
9513   // The TrueV/FalseV operands of the selects cannot depend on the result of
9514   // previous selects in the sequence.
9515   // These conditions could be further relaxed. See the X86 target for a
9516   // related approach and more information.
9517   Register LHS = MI.getOperand(1).getReg();
9518   Register RHS = MI.getOperand(2).getReg();
9519   auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
9520 
9521   SmallVector<MachineInstr *, 4> SelectDebugValues;
9522   SmallSet<Register, 4> SelectDests;
9523   SelectDests.insert(MI.getOperand(0).getReg());
9524 
9525   MachineInstr *LastSelectPseudo = &MI;
9526 
9527   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
9528        SequenceMBBI != E; ++SequenceMBBI) {
9529     if (SequenceMBBI->isDebugInstr())
9530       continue;
9531     if (isSelectPseudo(*SequenceMBBI)) {
9532       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
9533           SequenceMBBI->getOperand(2).getReg() != RHS ||
9534           SequenceMBBI->getOperand(3).getImm() != CC ||
9535           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
9536           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
9537         break;
9538       LastSelectPseudo = &*SequenceMBBI;
9539       SequenceMBBI->collectDebugValues(SelectDebugValues);
9540       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
9541     } else {
9542       if (SequenceMBBI->hasUnmodeledSideEffects() ||
9543           SequenceMBBI->mayLoadOrStore())
9544         break;
9545       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
9546             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
9547           }))
9548         break;
9549     }
9550   }
9551 
9552   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
9553   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9554   DebugLoc DL = MI.getDebugLoc();
9555   MachineFunction::iterator I = ++BB->getIterator();
9556 
9557   MachineBasicBlock *HeadMBB = BB;
9558   MachineFunction *F = BB->getParent();
9559   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
9560   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
9561 
9562   F->insert(I, IfFalseMBB);
9563   F->insert(I, TailMBB);
9564 
9565   // Transfer debug instructions associated with the selects to TailMBB.
9566   for (MachineInstr *DebugInstr : SelectDebugValues) {
9567     TailMBB->push_back(DebugInstr->removeFromParent());
9568   }
9569 
9570   // Move all instructions after the sequence to TailMBB.
9571   TailMBB->splice(TailMBB->end(), HeadMBB,
9572                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
9573   // Update machine-CFG edges by transferring all successors of the current
9574   // block to the new block which will contain the Phi nodes for the selects.
9575   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
9576   // Set the successors for HeadMBB.
9577   HeadMBB->addSuccessor(IfFalseMBB);
9578   HeadMBB->addSuccessor(TailMBB);
9579 
9580   // Insert appropriate branch.
9581   BuildMI(HeadMBB, DL, TII.getBrCond(CC))
9582     .addReg(LHS)
9583     .addReg(RHS)
9584     .addMBB(TailMBB);
9585 
9586   // IfFalseMBB just falls through to TailMBB.
9587   IfFalseMBB->addSuccessor(TailMBB);
9588 
9589   // Create PHIs for all of the select pseudo-instructions.
9590   auto SelectMBBI = MI.getIterator();
9591   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
9592   auto InsertionPoint = TailMBB->begin();
9593   while (SelectMBBI != SelectEnd) {
9594     auto Next = std::next(SelectMBBI);
9595     if (isSelectPseudo(*SelectMBBI)) {
9596       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
9597       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
9598               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
9599           .addReg(SelectMBBI->getOperand(4).getReg())
9600           .addMBB(HeadMBB)
9601           .addReg(SelectMBBI->getOperand(5).getReg())
9602           .addMBB(IfFalseMBB);
9603       SelectMBBI->eraseFromParent();
9604     }
9605     SelectMBBI = Next;
9606   }
9607 
9608   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
9609   return TailMBB;
9610 }
9611 
9612 MachineBasicBlock *
9613 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
9614                                                  MachineBasicBlock *BB) const {
9615   switch (MI.getOpcode()) {
9616   default:
9617     llvm_unreachable("Unexpected instr type to insert");
9618   case RISCV::ReadCycleWide:
9619     assert(!Subtarget.is64Bit() &&
9620            "ReadCycleWrite is only to be used on riscv32");
9621     return emitReadCycleWidePseudo(MI, BB);
9622   case RISCV::Select_GPR_Using_CC_GPR:
9623   case RISCV::Select_FPR16_Using_CC_GPR:
9624   case RISCV::Select_FPR32_Using_CC_GPR:
9625   case RISCV::Select_FPR64_Using_CC_GPR:
9626     return emitSelectPseudo(MI, BB, Subtarget);
9627   case RISCV::BuildPairF64Pseudo:
9628     return emitBuildPairF64Pseudo(MI, BB);
9629   case RISCV::SplitF64Pseudo:
9630     return emitSplitF64Pseudo(MI, BB);
9631   case RISCV::PseudoQuietFLE_H:
9632     return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
9633   case RISCV::PseudoQuietFLT_H:
9634     return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
9635   case RISCV::PseudoQuietFLE_S:
9636     return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
9637   case RISCV::PseudoQuietFLT_S:
9638     return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
9639   case RISCV::PseudoQuietFLE_D:
9640     return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
9641   case RISCV::PseudoQuietFLT_D:
9642     return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
9643   }
9644 }
9645 
9646 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
9647                                                         SDNode *Node) const {
9648   // Add FRM dependency to any instructions with dynamic rounding mode.
9649   unsigned Opc = MI.getOpcode();
9650   auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
9651   if (Idx < 0)
9652     return;
9653   if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
9654     return;
9655   // If the instruction already reads FRM, don't add another read.
9656   if (MI.readsRegister(RISCV::FRM))
9657     return;
9658   MI.addOperand(
9659       MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
9660 }
9661 
9662 // Calling Convention Implementation.
9663 // The expectations for frontend ABI lowering vary from target to target.
9664 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
9665 // details, but this is a longer term goal. For now, we simply try to keep the
9666 // role of the frontend as simple and well-defined as possible. The rules can
9667 // be summarised as:
9668 // * Never split up large scalar arguments. We handle them here.
9669 // * If a hardfloat calling convention is being used, and the struct may be
9670 // passed in a pair of registers (fp+fp, int+fp), and both registers are
9671 // available, then pass as two separate arguments. If either the GPRs or FPRs
9672 // are exhausted, then pass according to the rule below.
9673 // * If a struct could never be passed in registers or directly in a stack
9674 // slot (as it is larger than 2*XLEN and the floating point rules don't
9675 // apply), then pass it using a pointer with the byval attribute.
9676 // * If a struct is less than 2*XLEN, then coerce to either a two-element
9677 // word-sized array or a 2*XLEN scalar (depending on alignment).
9678 // * The frontend can determine whether a struct is returned by reference or
9679 // not based on its size and fields. If it will be returned by reference, the
9680 // frontend must modify the prototype so a pointer with the sret annotation is
9681 // passed as the first argument. This is not necessary for large scalar
9682 // returns.
9683 // * Struct return values and varargs should be coerced to structs containing
9684 // register-size fields in the same situations they would be for fixed
9685 // arguments.
9686 
9687 static const MCPhysReg ArgGPRs[] = {
9688   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
9689   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
9690 };
9691 static const MCPhysReg ArgFPR16s[] = {
9692   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
9693   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
9694 };
9695 static const MCPhysReg ArgFPR32s[] = {
9696   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
9697   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
9698 };
9699 static const MCPhysReg ArgFPR64s[] = {
9700   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
9701   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
9702 };
9703 // This is an interim calling convention and it may be changed in the future.
9704 static const MCPhysReg ArgVRs[] = {
9705     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
9706     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
9707     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
9708 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
9709                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
9710                                      RISCV::V20M2, RISCV::V22M2};
9711 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
9712                                      RISCV::V20M4};
9713 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
9714 
9715 // Pass a 2*XLEN argument that has been split into two XLEN values through
9716 // registers or the stack as necessary.
9717 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
9718                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
9719                                 MVT ValVT2, MVT LocVT2,
9720                                 ISD::ArgFlagsTy ArgFlags2) {
9721   unsigned XLenInBytes = XLen / 8;
9722   if (Register Reg = State.AllocateReg(ArgGPRs)) {
9723     // At least one half can be passed via register.
9724     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
9725                                      VA1.getLocVT(), CCValAssign::Full));
9726   } else {
9727     // Both halves must be passed on the stack, with proper alignment.
9728     Align StackAlign =
9729         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
9730     State.addLoc(
9731         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
9732                             State.AllocateStack(XLenInBytes, StackAlign),
9733                             VA1.getLocVT(), CCValAssign::Full));
9734     State.addLoc(CCValAssign::getMem(
9735         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
9736         LocVT2, CCValAssign::Full));
9737     return false;
9738   }
9739 
9740   if (Register Reg = State.AllocateReg(ArgGPRs)) {
9741     // The second half can also be passed via register.
9742     State.addLoc(
9743         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
9744   } else {
9745     // The second half is passed via the stack, without additional alignment.
9746     State.addLoc(CCValAssign::getMem(
9747         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
9748         LocVT2, CCValAssign::Full));
9749   }
9750 
9751   return false;
9752 }
9753 
9754 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
9755                                Optional<unsigned> FirstMaskArgument,
9756                                CCState &State, const RISCVTargetLowering &TLI) {
9757   const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
9758   if (RC == &RISCV::VRRegClass) {
9759     // Assign the first mask argument to V0.
9760     // This is an interim calling convention and it may be changed in the
9761     // future.
9762     if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue())
9763       return State.AllocateReg(RISCV::V0);
9764     return State.AllocateReg(ArgVRs);
9765   }
9766   if (RC == &RISCV::VRM2RegClass)
9767     return State.AllocateReg(ArgVRM2s);
9768   if (RC == &RISCV::VRM4RegClass)
9769     return State.AllocateReg(ArgVRM4s);
9770   if (RC == &RISCV::VRM8RegClass)
9771     return State.AllocateReg(ArgVRM8s);
9772   llvm_unreachable("Unhandled register class for ValueType");
9773 }
9774 
9775 // Implements the RISC-V calling convention. Returns true upon failure.
9776 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
9777                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
9778                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
9779                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
9780                      Optional<unsigned> FirstMaskArgument) {
9781   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
9782   assert(XLen == 32 || XLen == 64);
9783   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
9784 
9785   // Any return value split in to more than two values can't be returned
9786   // directly. Vectors are returned via the available vector registers.
9787   if (!LocVT.isVector() && IsRet && ValNo > 1)
9788     return true;
9789 
9790   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
9791   // variadic argument, or if no F16/F32 argument registers are available.
9792   bool UseGPRForF16_F32 = true;
9793   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
9794   // variadic argument, or if no F64 argument registers are available.
9795   bool UseGPRForF64 = true;
9796 
9797   switch (ABI) {
9798   default:
9799     llvm_unreachable("Unexpected ABI");
9800   case RISCVABI::ABI_ILP32:
9801   case RISCVABI::ABI_LP64:
9802     break;
9803   case RISCVABI::ABI_ILP32F:
9804   case RISCVABI::ABI_LP64F:
9805     UseGPRForF16_F32 = !IsFixed;
9806     break;
9807   case RISCVABI::ABI_ILP32D:
9808   case RISCVABI::ABI_LP64D:
9809     UseGPRForF16_F32 = !IsFixed;
9810     UseGPRForF64 = !IsFixed;
9811     break;
9812   }
9813 
9814   // FPR16, FPR32, and FPR64 alias each other.
9815   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
9816     UseGPRForF16_F32 = true;
9817     UseGPRForF64 = true;
9818   }
9819 
9820   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
9821   // similar local variables rather than directly checking against the target
9822   // ABI.
9823 
9824   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
9825     LocVT = XLenVT;
9826     LocInfo = CCValAssign::BCvt;
9827   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
9828     LocVT = MVT::i64;
9829     LocInfo = CCValAssign::BCvt;
9830   }
9831 
9832   // If this is a variadic argument, the RISC-V calling convention requires
9833   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
9834   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
9835   // be used regardless of whether the original argument was split during
9836   // legalisation or not. The argument will not be passed by registers if the
9837   // original type is larger than 2*XLEN, so the register alignment rule does
9838   // not apply.
9839   unsigned TwoXLenInBytes = (2 * XLen) / 8;
9840   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
9841       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
9842     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
9843     // Skip 'odd' register if necessary.
9844     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
9845       State.AllocateReg(ArgGPRs);
9846   }
9847 
9848   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
9849   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
9850       State.getPendingArgFlags();
9851 
9852   assert(PendingLocs.size() == PendingArgFlags.size() &&
9853          "PendingLocs and PendingArgFlags out of sync");
9854 
9855   // Handle passing f64 on RV32D with a soft float ABI or when floating point
9856   // registers are exhausted.
9857   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
9858     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
9859            "Can't lower f64 if it is split");
9860     // Depending on available argument GPRS, f64 may be passed in a pair of
9861     // GPRs, split between a GPR and the stack, or passed completely on the
9862     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
9863     // cases.
9864     Register Reg = State.AllocateReg(ArgGPRs);
9865     LocVT = MVT::i32;
9866     if (!Reg) {
9867       unsigned StackOffset = State.AllocateStack(8, Align(8));
9868       State.addLoc(
9869           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9870       return false;
9871     }
9872     if (!State.AllocateReg(ArgGPRs))
9873       State.AllocateStack(4, Align(4));
9874     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9875     return false;
9876   }
9877 
9878   // Fixed-length vectors are located in the corresponding scalable-vector
9879   // container types.
9880   if (ValVT.isFixedLengthVector())
9881     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
9882 
9883   // Split arguments might be passed indirectly, so keep track of the pending
9884   // values. Split vectors are passed via a mix of registers and indirectly, so
9885   // treat them as we would any other argument.
9886   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
9887     LocVT = XLenVT;
9888     LocInfo = CCValAssign::Indirect;
9889     PendingLocs.push_back(
9890         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
9891     PendingArgFlags.push_back(ArgFlags);
9892     if (!ArgFlags.isSplitEnd()) {
9893       return false;
9894     }
9895   }
9896 
9897   // If the split argument only had two elements, it should be passed directly
9898   // in registers or on the stack.
9899   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
9900       PendingLocs.size() <= 2) {
9901     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
9902     // Apply the normal calling convention rules to the first half of the
9903     // split argument.
9904     CCValAssign VA = PendingLocs[0];
9905     ISD::ArgFlagsTy AF = PendingArgFlags[0];
9906     PendingLocs.clear();
9907     PendingArgFlags.clear();
9908     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
9909                                ArgFlags);
9910   }
9911 
9912   // Allocate to a register if possible, or else a stack slot.
9913   Register Reg;
9914   unsigned StoreSizeBytes = XLen / 8;
9915   Align StackAlign = Align(XLen / 8);
9916 
9917   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
9918     Reg = State.AllocateReg(ArgFPR16s);
9919   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
9920     Reg = State.AllocateReg(ArgFPR32s);
9921   else if (ValVT == MVT::f64 && !UseGPRForF64)
9922     Reg = State.AllocateReg(ArgFPR64s);
9923   else if (ValVT.isVector()) {
9924     Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
9925     if (!Reg) {
9926       // For return values, the vector must be passed fully via registers or
9927       // via the stack.
9928       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
9929       // but we're using all of them.
9930       if (IsRet)
9931         return true;
9932       // Try using a GPR to pass the address
9933       if ((Reg = State.AllocateReg(ArgGPRs))) {
9934         LocVT = XLenVT;
9935         LocInfo = CCValAssign::Indirect;
9936       } else if (ValVT.isScalableVector()) {
9937         LocVT = XLenVT;
9938         LocInfo = CCValAssign::Indirect;
9939       } else {
9940         // Pass fixed-length vectors on the stack.
9941         LocVT = ValVT;
9942         StoreSizeBytes = ValVT.getStoreSize();
9943         // Align vectors to their element sizes, being careful for vXi1
9944         // vectors.
9945         StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
9946       }
9947     }
9948   } else {
9949     Reg = State.AllocateReg(ArgGPRs);
9950   }
9951 
9952   unsigned StackOffset =
9953       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
9954 
9955   // If we reach this point and PendingLocs is non-empty, we must be at the
9956   // end of a split argument that must be passed indirectly.
9957   if (!PendingLocs.empty()) {
9958     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
9959     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
9960 
9961     for (auto &It : PendingLocs) {
9962       if (Reg)
9963         It.convertToReg(Reg);
9964       else
9965         It.convertToMem(StackOffset);
9966       State.addLoc(It);
9967     }
9968     PendingLocs.clear();
9969     PendingArgFlags.clear();
9970     return false;
9971   }
9972 
9973   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
9974           (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
9975          "Expected an XLenVT or vector types at this stage");
9976 
9977   if (Reg) {
9978     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9979     return false;
9980   }
9981 
9982   // When a floating-point value is passed on the stack, no bit-conversion is
9983   // needed.
9984   if (ValVT.isFloatingPoint()) {
9985     LocVT = ValVT;
9986     LocInfo = CCValAssign::Full;
9987   }
9988   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9989   return false;
9990 }
9991 
9992 template <typename ArgTy>
9993 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
9994   for (const auto &ArgIdx : enumerate(Args)) {
9995     MVT ArgVT = ArgIdx.value().VT;
9996     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
9997       return ArgIdx.index();
9998   }
9999   return None;
10000 }
10001 
10002 void RISCVTargetLowering::analyzeInputArgs(
10003     MachineFunction &MF, CCState &CCInfo,
10004     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
10005     RISCVCCAssignFn Fn) const {
10006   unsigned NumArgs = Ins.size();
10007   FunctionType *FType = MF.getFunction().getFunctionType();
10008 
10009   Optional<unsigned> FirstMaskArgument;
10010   if (Subtarget.hasVInstructions())
10011     FirstMaskArgument = preAssignMask(Ins);
10012 
10013   for (unsigned i = 0; i != NumArgs; ++i) {
10014     MVT ArgVT = Ins[i].VT;
10015     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
10016 
10017     Type *ArgTy = nullptr;
10018     if (IsRet)
10019       ArgTy = FType->getReturnType();
10020     else if (Ins[i].isOrigArg())
10021       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
10022 
10023     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
10024     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
10025            ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
10026            FirstMaskArgument)) {
10027       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
10028                         << EVT(ArgVT).getEVTString() << '\n');
10029       llvm_unreachable(nullptr);
10030     }
10031   }
10032 }
10033 
10034 void RISCVTargetLowering::analyzeOutputArgs(
10035     MachineFunction &MF, CCState &CCInfo,
10036     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
10037     CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
10038   unsigned NumArgs = Outs.size();
10039 
10040   Optional<unsigned> FirstMaskArgument;
10041   if (Subtarget.hasVInstructions())
10042     FirstMaskArgument = preAssignMask(Outs);
10043 
10044   for (unsigned i = 0; i != NumArgs; i++) {
10045     MVT ArgVT = Outs[i].VT;
10046     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
10047     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
10048 
10049     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
10050     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
10051            ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
10052            FirstMaskArgument)) {
10053       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
10054                         << EVT(ArgVT).getEVTString() << "\n");
10055       llvm_unreachable(nullptr);
10056     }
10057   }
10058 }
10059 
10060 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
10061 // values.
10062 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
10063                                    const CCValAssign &VA, const SDLoc &DL,
10064                                    const RISCVSubtarget &Subtarget) {
10065   switch (VA.getLocInfo()) {
10066   default:
10067     llvm_unreachable("Unexpected CCValAssign::LocInfo");
10068   case CCValAssign::Full:
10069     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
10070       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
10071     break;
10072   case CCValAssign::BCvt:
10073     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
10074       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
10075     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
10076       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
10077     else
10078       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
10079     break;
10080   }
10081   return Val;
10082 }
10083 
10084 // The caller is responsible for loading the full value if the argument is
10085 // passed with CCValAssign::Indirect.
10086 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
10087                                 const CCValAssign &VA, const SDLoc &DL,
10088                                 const RISCVTargetLowering &TLI) {
10089   MachineFunction &MF = DAG.getMachineFunction();
10090   MachineRegisterInfo &RegInfo = MF.getRegInfo();
10091   EVT LocVT = VA.getLocVT();
10092   SDValue Val;
10093   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
10094   Register VReg = RegInfo.createVirtualRegister(RC);
10095   RegInfo.addLiveIn(VA.getLocReg(), VReg);
10096   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
10097 
10098   if (VA.getLocInfo() == CCValAssign::Indirect)
10099     return Val;
10100 
10101   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
10102 }
10103 
10104 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
10105                                    const CCValAssign &VA, const SDLoc &DL,
10106                                    const RISCVSubtarget &Subtarget) {
10107   EVT LocVT = VA.getLocVT();
10108 
10109   switch (VA.getLocInfo()) {
10110   default:
10111     llvm_unreachable("Unexpected CCValAssign::LocInfo");
10112   case CCValAssign::Full:
10113     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
10114       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
10115     break;
10116   case CCValAssign::BCvt:
10117     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
10118       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
10119     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
10120       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
10121     else
10122       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
10123     break;
10124   }
10125   return Val;
10126 }
10127 
10128 // The caller is responsible for loading the full value if the argument is
10129 // passed with CCValAssign::Indirect.
10130 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
10131                                 const CCValAssign &VA, const SDLoc &DL) {
10132   MachineFunction &MF = DAG.getMachineFunction();
10133   MachineFrameInfo &MFI = MF.getFrameInfo();
10134   EVT LocVT = VA.getLocVT();
10135   EVT ValVT = VA.getValVT();
10136   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
10137   if (ValVT.isScalableVector()) {
10138     // When the value is a scalable vector, we save the pointer which points to
10139     // the scalable vector value in the stack. The ValVT will be the pointer
10140     // type, instead of the scalable vector type.
10141     ValVT = LocVT;
10142   }
10143   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
10144                                  /*IsImmutable=*/true);
10145   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
10146   SDValue Val;
10147 
10148   ISD::LoadExtType ExtType;
10149   switch (VA.getLocInfo()) {
10150   default:
10151     llvm_unreachable("Unexpected CCValAssign::LocInfo");
10152   case CCValAssign::Full:
10153   case CCValAssign::Indirect:
10154   case CCValAssign::BCvt:
10155     ExtType = ISD::NON_EXTLOAD;
10156     break;
10157   }
10158   Val = DAG.getExtLoad(
10159       ExtType, DL, LocVT, Chain, FIN,
10160       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
10161   return Val;
10162 }
10163 
10164 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
10165                                        const CCValAssign &VA, const SDLoc &DL) {
10166   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
10167          "Unexpected VA");
10168   MachineFunction &MF = DAG.getMachineFunction();
10169   MachineFrameInfo &MFI = MF.getFrameInfo();
10170   MachineRegisterInfo &RegInfo = MF.getRegInfo();
10171 
10172   if (VA.isMemLoc()) {
10173     // f64 is passed on the stack.
10174     int FI =
10175         MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
10176     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
10177     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
10178                        MachinePointerInfo::getFixedStack(MF, FI));
10179   }
10180 
10181   assert(VA.isRegLoc() && "Expected register VA assignment");
10182 
10183   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
10184   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
10185   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
10186   SDValue Hi;
10187   if (VA.getLocReg() == RISCV::X17) {
10188     // Second half of f64 is passed on the stack.
10189     int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
10190     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
10191     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
10192                      MachinePointerInfo::getFixedStack(MF, FI));
10193   } else {
10194     // Second half of f64 is passed in another GPR.
10195     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
10196     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
10197     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
10198   }
10199   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
10200 }
10201 
10202 // FastCC has less than 1% performance improvement for some particular
10203 // benchmark. But theoretically, it may has benenfit for some cases.
10204 static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
10205                             unsigned ValNo, MVT ValVT, MVT LocVT,
10206                             CCValAssign::LocInfo LocInfo,
10207                             ISD::ArgFlagsTy ArgFlags, CCState &State,
10208                             bool IsFixed, bool IsRet, Type *OrigTy,
10209                             const RISCVTargetLowering &TLI,
10210                             Optional<unsigned> FirstMaskArgument) {
10211 
10212   // X5 and X6 might be used for save-restore libcall.
10213   static const MCPhysReg GPRList[] = {
10214       RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
10215       RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
10216       RISCV::X29, RISCV::X30, RISCV::X31};
10217 
10218   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
10219     if (unsigned Reg = State.AllocateReg(GPRList)) {
10220       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10221       return false;
10222     }
10223   }
10224 
10225   if (LocVT == MVT::f16) {
10226     static const MCPhysReg FPR16List[] = {
10227         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
10228         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
10229         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
10230         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
10231     if (unsigned Reg = State.AllocateReg(FPR16List)) {
10232       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10233       return false;
10234     }
10235   }
10236 
10237   if (LocVT == MVT::f32) {
10238     static const MCPhysReg FPR32List[] = {
10239         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
10240         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
10241         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
10242         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
10243     if (unsigned Reg = State.AllocateReg(FPR32List)) {
10244       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10245       return false;
10246     }
10247   }
10248 
10249   if (LocVT == MVT::f64) {
10250     static const MCPhysReg FPR64List[] = {
10251         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
10252         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
10253         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
10254         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
10255     if (unsigned Reg = State.AllocateReg(FPR64List)) {
10256       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10257       return false;
10258     }
10259   }
10260 
10261   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
10262     unsigned Offset4 = State.AllocateStack(4, Align(4));
10263     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
10264     return false;
10265   }
10266 
10267   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
10268     unsigned Offset5 = State.AllocateStack(8, Align(8));
10269     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
10270     return false;
10271   }
10272 
10273   if (LocVT.isVector()) {
10274     if (unsigned Reg =
10275             allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
10276       // Fixed-length vectors are located in the corresponding scalable-vector
10277       // container types.
10278       if (ValVT.isFixedLengthVector())
10279         LocVT = TLI.getContainerForFixedLengthVector(LocVT);
10280       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10281     } else {
10282       // Try and pass the address via a "fast" GPR.
10283       if (unsigned GPRReg = State.AllocateReg(GPRList)) {
10284         LocInfo = CCValAssign::Indirect;
10285         LocVT = TLI.getSubtarget().getXLenVT();
10286         State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
10287       } else if (ValVT.isFixedLengthVector()) {
10288         auto StackAlign =
10289             MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
10290         unsigned StackOffset =
10291             State.AllocateStack(ValVT.getStoreSize(), StackAlign);
10292         State.addLoc(
10293             CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
10294       } else {
10295         // Can't pass scalable vectors on the stack.
10296         return true;
10297       }
10298     }
10299 
10300     return false;
10301   }
10302 
10303   return true; // CC didn't match.
10304 }
10305 
10306 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
10307                          CCValAssign::LocInfo LocInfo,
10308                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
10309 
10310   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
10311     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
10312     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
10313     static const MCPhysReg GPRList[] = {
10314         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
10315         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
10316     if (unsigned Reg = State.AllocateReg(GPRList)) {
10317       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10318       return false;
10319     }
10320   }
10321 
10322   if (LocVT == MVT::f32) {
10323     // Pass in STG registers: F1, ..., F6
10324     //                        fs0 ... fs5
10325     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
10326                                           RISCV::F18_F, RISCV::F19_F,
10327                                           RISCV::F20_F, RISCV::F21_F};
10328     if (unsigned Reg = State.AllocateReg(FPR32List)) {
10329       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10330       return false;
10331     }
10332   }
10333 
10334   if (LocVT == MVT::f64) {
10335     // Pass in STG registers: D1, ..., D6
10336     //                        fs6 ... fs11
10337     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
10338                                           RISCV::F24_D, RISCV::F25_D,
10339                                           RISCV::F26_D, RISCV::F27_D};
10340     if (unsigned Reg = State.AllocateReg(FPR64List)) {
10341       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10342       return false;
10343     }
10344   }
10345 
10346   report_fatal_error("No registers left in GHC calling convention");
10347   return true;
10348 }
10349 
10350 // Transform physical registers into virtual registers.
10351 SDValue RISCVTargetLowering::LowerFormalArguments(
10352     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
10353     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
10354     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
10355 
10356   MachineFunction &MF = DAG.getMachineFunction();
10357 
10358   switch (CallConv) {
10359   default:
10360     report_fatal_error("Unsupported calling convention");
10361   case CallingConv::C:
10362   case CallingConv::Fast:
10363     break;
10364   case CallingConv::GHC:
10365     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
10366         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
10367       report_fatal_error(
10368         "GHC calling convention requires the F and D instruction set extensions");
10369   }
10370 
10371   const Function &Func = MF.getFunction();
10372   if (Func.hasFnAttribute("interrupt")) {
10373     if (!Func.arg_empty())
10374       report_fatal_error(
10375         "Functions with the interrupt attribute cannot have arguments!");
10376 
10377     StringRef Kind =
10378       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
10379 
10380     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
10381       report_fatal_error(
10382         "Function interrupt attribute argument not supported!");
10383   }
10384 
10385   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10386   MVT XLenVT = Subtarget.getXLenVT();
10387   unsigned XLenInBytes = Subtarget.getXLen() / 8;
10388   // Used with vargs to acumulate store chains.
10389   std::vector<SDValue> OutChains;
10390 
10391   // Assign locations to all of the incoming arguments.
10392   SmallVector<CCValAssign, 16> ArgLocs;
10393   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
10394 
10395   if (CallConv == CallingConv::GHC)
10396     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
10397   else
10398     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
10399                      CallConv == CallingConv::Fast ? CC_RISCV_FastCC
10400                                                    : CC_RISCV);
10401 
10402   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
10403     CCValAssign &VA = ArgLocs[i];
10404     SDValue ArgValue;
10405     // Passing f64 on RV32D with a soft float ABI must be handled as a special
10406     // case.
10407     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
10408       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
10409     else if (VA.isRegLoc())
10410       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
10411     else
10412       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
10413 
10414     if (VA.getLocInfo() == CCValAssign::Indirect) {
10415       // If the original argument was split and passed by reference (e.g. i128
10416       // on RV32), we need to load all parts of it here (using the same
10417       // address). Vectors may be partly split to registers and partly to the
10418       // stack, in which case the base address is partly offset and subsequent
10419       // stores are relative to that.
10420       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
10421                                    MachinePointerInfo()));
10422       unsigned ArgIndex = Ins[i].OrigArgIndex;
10423       unsigned ArgPartOffset = Ins[i].PartOffset;
10424       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
10425       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
10426         CCValAssign &PartVA = ArgLocs[i + 1];
10427         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
10428         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
10429         if (PartVA.getValVT().isScalableVector())
10430           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
10431         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
10432         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
10433                                      MachinePointerInfo()));
10434         ++i;
10435       }
10436       continue;
10437     }
10438     InVals.push_back(ArgValue);
10439   }
10440 
10441   if (IsVarArg) {
10442     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
10443     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
10444     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
10445     MachineFrameInfo &MFI = MF.getFrameInfo();
10446     MachineRegisterInfo &RegInfo = MF.getRegInfo();
10447     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
10448 
10449     // Offset of the first variable argument from stack pointer, and size of
10450     // the vararg save area. For now, the varargs save area is either zero or
10451     // large enough to hold a0-a7.
10452     int VaArgOffset, VarArgsSaveSize;
10453 
10454     // If all registers are allocated, then all varargs must be passed on the
10455     // stack and we don't need to save any argregs.
10456     if (ArgRegs.size() == Idx) {
10457       VaArgOffset = CCInfo.getNextStackOffset();
10458       VarArgsSaveSize = 0;
10459     } else {
10460       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
10461       VaArgOffset = -VarArgsSaveSize;
10462     }
10463 
10464     // Record the frame index of the first variable argument
10465     // which is a value necessary to VASTART.
10466     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
10467     RVFI->setVarArgsFrameIndex(FI);
10468 
10469     // If saving an odd number of registers then create an extra stack slot to
10470     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
10471     // offsets to even-numbered registered remain 2*XLEN-aligned.
10472     if (Idx % 2) {
10473       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
10474       VarArgsSaveSize += XLenInBytes;
10475     }
10476 
10477     // Copy the integer registers that may have been used for passing varargs
10478     // to the vararg save area.
10479     for (unsigned I = Idx; I < ArgRegs.size();
10480          ++I, VaArgOffset += XLenInBytes) {
10481       const Register Reg = RegInfo.createVirtualRegister(RC);
10482       RegInfo.addLiveIn(ArgRegs[I], Reg);
10483       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
10484       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
10485       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
10486       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
10487                                    MachinePointerInfo::getFixedStack(MF, FI));
10488       cast<StoreSDNode>(Store.getNode())
10489           ->getMemOperand()
10490           ->setValue((Value *)nullptr);
10491       OutChains.push_back(Store);
10492     }
10493     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
10494   }
10495 
10496   // All stores are grouped in one node to allow the matching between
10497   // the size of Ins and InVals. This only happens for vararg functions.
10498   if (!OutChains.empty()) {
10499     OutChains.push_back(Chain);
10500     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
10501   }
10502 
10503   return Chain;
10504 }
10505 
10506 /// isEligibleForTailCallOptimization - Check whether the call is eligible
10507 /// for tail call optimization.
10508 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
10509 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
10510     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
10511     const SmallVector<CCValAssign, 16> &ArgLocs) const {
10512 
10513   auto &Callee = CLI.Callee;
10514   auto CalleeCC = CLI.CallConv;
10515   auto &Outs = CLI.Outs;
10516   auto &Caller = MF.getFunction();
10517   auto CallerCC = Caller.getCallingConv();
10518 
10519   // Exception-handling functions need a special set of instructions to
10520   // indicate a return to the hardware. Tail-calling another function would
10521   // probably break this.
10522   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
10523   // should be expanded as new function attributes are introduced.
10524   if (Caller.hasFnAttribute("interrupt"))
10525     return false;
10526 
10527   // Do not tail call opt if the stack is used to pass parameters.
10528   if (CCInfo.getNextStackOffset() != 0)
10529     return false;
10530 
10531   // Do not tail call opt if any parameters need to be passed indirectly.
10532   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
10533   // passed indirectly. So the address of the value will be passed in a
10534   // register, or if not available, then the address is put on the stack. In
10535   // order to pass indirectly, space on the stack often needs to be allocated
10536   // in order to store the value. In this case the CCInfo.getNextStackOffset()
10537   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
10538   // are passed CCValAssign::Indirect.
10539   for (auto &VA : ArgLocs)
10540     if (VA.getLocInfo() == CCValAssign::Indirect)
10541       return false;
10542 
10543   // Do not tail call opt if either caller or callee uses struct return
10544   // semantics.
10545   auto IsCallerStructRet = Caller.hasStructRetAttr();
10546   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
10547   if (IsCallerStructRet || IsCalleeStructRet)
10548     return false;
10549 
10550   // Externally-defined functions with weak linkage should not be
10551   // tail-called. The behaviour of branch instructions in this situation (as
10552   // used for tail calls) is implementation-defined, so we cannot rely on the
10553   // linker replacing the tail call with a return.
10554   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
10555     const GlobalValue *GV = G->getGlobal();
10556     if (GV->hasExternalWeakLinkage())
10557       return false;
10558   }
10559 
10560   // The callee has to preserve all registers the caller needs to preserve.
10561   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10562   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
10563   if (CalleeCC != CallerCC) {
10564     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
10565     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
10566       return false;
10567   }
10568 
10569   // Byval parameters hand the function a pointer directly into the stack area
10570   // we want to reuse during a tail call. Working around this *is* possible
10571   // but less efficient and uglier in LowerCall.
10572   for (auto &Arg : Outs)
10573     if (Arg.Flags.isByVal())
10574       return false;
10575 
10576   return true;
10577 }
10578 
10579 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
10580   return DAG.getDataLayout().getPrefTypeAlign(
10581       VT.getTypeForEVT(*DAG.getContext()));
10582 }
10583 
10584 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
10585 // and output parameter nodes.
10586 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
10587                                        SmallVectorImpl<SDValue> &InVals) const {
10588   SelectionDAG &DAG = CLI.DAG;
10589   SDLoc &DL = CLI.DL;
10590   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
10591   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
10592   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
10593   SDValue Chain = CLI.Chain;
10594   SDValue Callee = CLI.Callee;
10595   bool &IsTailCall = CLI.IsTailCall;
10596   CallingConv::ID CallConv = CLI.CallConv;
10597   bool IsVarArg = CLI.IsVarArg;
10598   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10599   MVT XLenVT = Subtarget.getXLenVT();
10600 
10601   MachineFunction &MF = DAG.getMachineFunction();
10602 
10603   // Analyze the operands of the call, assigning locations to each operand.
10604   SmallVector<CCValAssign, 16> ArgLocs;
10605   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
10606 
10607   if (CallConv == CallingConv::GHC)
10608     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
10609   else
10610     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
10611                       CallConv == CallingConv::Fast ? CC_RISCV_FastCC
10612                                                     : CC_RISCV);
10613 
10614   // Check if it's really possible to do a tail call.
10615   if (IsTailCall)
10616     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
10617 
10618   if (IsTailCall)
10619     ++NumTailCalls;
10620   else if (CLI.CB && CLI.CB->isMustTailCall())
10621     report_fatal_error("failed to perform tail call elimination on a call "
10622                        "site marked musttail");
10623 
10624   // Get a count of how many bytes are to be pushed on the stack.
10625   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
10626 
10627   // Create local copies for byval args
10628   SmallVector<SDValue, 8> ByValArgs;
10629   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
10630     ISD::ArgFlagsTy Flags = Outs[i].Flags;
10631     if (!Flags.isByVal())
10632       continue;
10633 
10634     SDValue Arg = OutVals[i];
10635     unsigned Size = Flags.getByValSize();
10636     Align Alignment = Flags.getNonZeroByValAlign();
10637 
10638     int FI =
10639         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
10640     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
10641     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
10642 
10643     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
10644                           /*IsVolatile=*/false,
10645                           /*AlwaysInline=*/false, IsTailCall,
10646                           MachinePointerInfo(), MachinePointerInfo());
10647     ByValArgs.push_back(FIPtr);
10648   }
10649 
10650   if (!IsTailCall)
10651     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
10652 
10653   // Copy argument values to their designated locations.
10654   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
10655   SmallVector<SDValue, 8> MemOpChains;
10656   SDValue StackPtr;
10657   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
10658     CCValAssign &VA = ArgLocs[i];
10659     SDValue ArgValue = OutVals[i];
10660     ISD::ArgFlagsTy Flags = Outs[i].Flags;
10661 
10662     // Handle passing f64 on RV32D with a soft float ABI as a special case.
10663     bool IsF64OnRV32DSoftABI =
10664         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
10665     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
10666       SDValue SplitF64 = DAG.getNode(
10667           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
10668       SDValue Lo = SplitF64.getValue(0);
10669       SDValue Hi = SplitF64.getValue(1);
10670 
10671       Register RegLo = VA.getLocReg();
10672       RegsToPass.push_back(std::make_pair(RegLo, Lo));
10673 
10674       if (RegLo == RISCV::X17) {
10675         // Second half of f64 is passed on the stack.
10676         // Work out the address of the stack slot.
10677         if (!StackPtr.getNode())
10678           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
10679         // Emit the store.
10680         MemOpChains.push_back(
10681             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
10682       } else {
10683         // Second half of f64 is passed in another GPR.
10684         assert(RegLo < RISCV::X31 && "Invalid register pair");
10685         Register RegHigh = RegLo + 1;
10686         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
10687       }
10688       continue;
10689     }
10690 
10691     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
10692     // as any other MemLoc.
10693 
10694     // Promote the value if needed.
10695     // For now, only handle fully promoted and indirect arguments.
10696     if (VA.getLocInfo() == CCValAssign::Indirect) {
10697       // Store the argument in a stack slot and pass its address.
10698       Align StackAlign =
10699           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
10700                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
10701       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
10702       // If the original argument was split (e.g. i128), we need
10703       // to store the required parts of it here (and pass just one address).
10704       // Vectors may be partly split to registers and partly to the stack, in
10705       // which case the base address is partly offset and subsequent stores are
10706       // relative to that.
10707       unsigned ArgIndex = Outs[i].OrigArgIndex;
10708       unsigned ArgPartOffset = Outs[i].PartOffset;
10709       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
10710       // Calculate the total size to store. We don't have access to what we're
10711       // actually storing other than performing the loop and collecting the
10712       // info.
10713       SmallVector<std::pair<SDValue, SDValue>> Parts;
10714       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
10715         SDValue PartValue = OutVals[i + 1];
10716         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
10717         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
10718         EVT PartVT = PartValue.getValueType();
10719         if (PartVT.isScalableVector())
10720           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
10721         StoredSize += PartVT.getStoreSize();
10722         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
10723         Parts.push_back(std::make_pair(PartValue, Offset));
10724         ++i;
10725       }
10726       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
10727       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
10728       MemOpChains.push_back(
10729           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
10730                        MachinePointerInfo::getFixedStack(MF, FI)));
10731       for (const auto &Part : Parts) {
10732         SDValue PartValue = Part.first;
10733         SDValue PartOffset = Part.second;
10734         SDValue Address =
10735             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
10736         MemOpChains.push_back(
10737             DAG.getStore(Chain, DL, PartValue, Address,
10738                          MachinePointerInfo::getFixedStack(MF, FI)));
10739       }
10740       ArgValue = SpillSlot;
10741     } else {
10742       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
10743     }
10744 
10745     // Use local copy if it is a byval arg.
10746     if (Flags.isByVal())
10747       ArgValue = ByValArgs[j++];
10748 
10749     if (VA.isRegLoc()) {
10750       // Queue up the argument copies and emit them at the end.
10751       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
10752     } else {
10753       assert(VA.isMemLoc() && "Argument not register or memory");
10754       assert(!IsTailCall && "Tail call not allowed if stack is used "
10755                             "for passing parameters");
10756 
10757       // Work out the address of the stack slot.
10758       if (!StackPtr.getNode())
10759         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
10760       SDValue Address =
10761           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
10762                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
10763 
10764       // Emit the store.
10765       MemOpChains.push_back(
10766           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
10767     }
10768   }
10769 
10770   // Join the stores, which are independent of one another.
10771   if (!MemOpChains.empty())
10772     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
10773 
10774   SDValue Glue;
10775 
10776   // Build a sequence of copy-to-reg nodes, chained and glued together.
10777   for (auto &Reg : RegsToPass) {
10778     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
10779     Glue = Chain.getValue(1);
10780   }
10781 
10782   // Validate that none of the argument registers have been marked as
10783   // reserved, if so report an error. Do the same for the return address if this
10784   // is not a tailcall.
10785   validateCCReservedRegs(RegsToPass, MF);
10786   if (!IsTailCall &&
10787       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
10788     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
10789         MF.getFunction(),
10790         "Return address register required, but has been reserved."});
10791 
10792   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
10793   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
10794   // split it and then direct call can be matched by PseudoCALL.
10795   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
10796     const GlobalValue *GV = S->getGlobal();
10797 
10798     unsigned OpFlags = RISCVII::MO_CALL;
10799     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
10800       OpFlags = RISCVII::MO_PLT;
10801 
10802     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
10803   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
10804     unsigned OpFlags = RISCVII::MO_CALL;
10805 
10806     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
10807                                                  nullptr))
10808       OpFlags = RISCVII::MO_PLT;
10809 
10810     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
10811   }
10812 
10813   // The first call operand is the chain and the second is the target address.
10814   SmallVector<SDValue, 8> Ops;
10815   Ops.push_back(Chain);
10816   Ops.push_back(Callee);
10817 
10818   // Add argument registers to the end of the list so that they are
10819   // known live into the call.
10820   for (auto &Reg : RegsToPass)
10821     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
10822 
10823   if (!IsTailCall) {
10824     // Add a register mask operand representing the call-preserved registers.
10825     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
10826     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
10827     assert(Mask && "Missing call preserved mask for calling convention");
10828     Ops.push_back(DAG.getRegisterMask(Mask));
10829   }
10830 
10831   // Glue the call to the argument copies, if any.
10832   if (Glue.getNode())
10833     Ops.push_back(Glue);
10834 
10835   // Emit the call.
10836   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10837 
10838   if (IsTailCall) {
10839     MF.getFrameInfo().setHasTailCall();
10840     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
10841   }
10842 
10843   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
10844   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
10845   Glue = Chain.getValue(1);
10846 
10847   // Mark the end of the call, which is glued to the call itself.
10848   Chain = DAG.getCALLSEQ_END(Chain,
10849                              DAG.getConstant(NumBytes, DL, PtrVT, true),
10850                              DAG.getConstant(0, DL, PtrVT, true),
10851                              Glue, DL);
10852   Glue = Chain.getValue(1);
10853 
10854   // Assign locations to each value returned by this call.
10855   SmallVector<CCValAssign, 16> RVLocs;
10856   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
10857   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
10858 
10859   // Copy all of the result registers out of their specified physreg.
10860   for (auto &VA : RVLocs) {
10861     // Copy the value out
10862     SDValue RetValue =
10863         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
10864     // Glue the RetValue to the end of the call sequence
10865     Chain = RetValue.getValue(1);
10866     Glue = RetValue.getValue(2);
10867 
10868     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10869       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
10870       SDValue RetValue2 =
10871           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
10872       Chain = RetValue2.getValue(1);
10873       Glue = RetValue2.getValue(2);
10874       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
10875                              RetValue2);
10876     }
10877 
10878     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
10879 
10880     InVals.push_back(RetValue);
10881   }
10882 
10883   return Chain;
10884 }
10885 
10886 bool RISCVTargetLowering::CanLowerReturn(
10887     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
10888     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
10889   SmallVector<CCValAssign, 16> RVLocs;
10890   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
10891 
10892   Optional<unsigned> FirstMaskArgument;
10893   if (Subtarget.hasVInstructions())
10894     FirstMaskArgument = preAssignMask(Outs);
10895 
10896   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
10897     MVT VT = Outs[i].VT;
10898     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
10899     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
10900     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
10901                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
10902                  *this, FirstMaskArgument))
10903       return false;
10904   }
10905   return true;
10906 }
10907 
10908 SDValue
10909 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
10910                                  bool IsVarArg,
10911                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
10912                                  const SmallVectorImpl<SDValue> &OutVals,
10913                                  const SDLoc &DL, SelectionDAG &DAG) const {
10914   const MachineFunction &MF = DAG.getMachineFunction();
10915   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
10916 
10917   // Stores the assignment of the return value to a location.
10918   SmallVector<CCValAssign, 16> RVLocs;
10919 
10920   // Info about the registers and stack slot.
10921   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
10922                  *DAG.getContext());
10923 
10924   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
10925                     nullptr, CC_RISCV);
10926 
10927   if (CallConv == CallingConv::GHC && !RVLocs.empty())
10928     report_fatal_error("GHC functions return void only");
10929 
10930   SDValue Glue;
10931   SmallVector<SDValue, 4> RetOps(1, Chain);
10932 
10933   // Copy the result values into the output registers.
10934   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
10935     SDValue Val = OutVals[i];
10936     CCValAssign &VA = RVLocs[i];
10937     assert(VA.isRegLoc() && "Can only return in registers!");
10938 
10939     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10940       // Handle returning f64 on RV32D with a soft float ABI.
10941       assert(VA.isRegLoc() && "Expected return via registers");
10942       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
10943                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
10944       SDValue Lo = SplitF64.getValue(0);
10945       SDValue Hi = SplitF64.getValue(1);
10946       Register RegLo = VA.getLocReg();
10947       assert(RegLo < RISCV::X31 && "Invalid register pair");
10948       Register RegHi = RegLo + 1;
10949 
10950       if (STI.isRegisterReservedByUser(RegLo) ||
10951           STI.isRegisterReservedByUser(RegHi))
10952         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
10953             MF.getFunction(),
10954             "Return value register required, but has been reserved."});
10955 
10956       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
10957       Glue = Chain.getValue(1);
10958       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
10959       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
10960       Glue = Chain.getValue(1);
10961       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
10962     } else {
10963       // Handle a 'normal' return.
10964       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
10965       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
10966 
10967       if (STI.isRegisterReservedByUser(VA.getLocReg()))
10968         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
10969             MF.getFunction(),
10970             "Return value register required, but has been reserved."});
10971 
10972       // Guarantee that all emitted copies are stuck together.
10973       Glue = Chain.getValue(1);
10974       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
10975     }
10976   }
10977 
10978   RetOps[0] = Chain; // Update chain.
10979 
10980   // Add the glue node if we have it.
10981   if (Glue.getNode()) {
10982     RetOps.push_back(Glue);
10983   }
10984 
10985   unsigned RetOpc = RISCVISD::RET_FLAG;
10986   // Interrupt service routines use different return instructions.
10987   const Function &Func = DAG.getMachineFunction().getFunction();
10988   if (Func.hasFnAttribute("interrupt")) {
10989     if (!Func.getReturnType()->isVoidTy())
10990       report_fatal_error(
10991           "Functions with the interrupt attribute must have void return type!");
10992 
10993     MachineFunction &MF = DAG.getMachineFunction();
10994     StringRef Kind =
10995       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
10996 
10997     if (Kind == "user")
10998       RetOpc = RISCVISD::URET_FLAG;
10999     else if (Kind == "supervisor")
11000       RetOpc = RISCVISD::SRET_FLAG;
11001     else
11002       RetOpc = RISCVISD::MRET_FLAG;
11003   }
11004 
11005   return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
11006 }
11007 
11008 void RISCVTargetLowering::validateCCReservedRegs(
11009     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
11010     MachineFunction &MF) const {
11011   const Function &F = MF.getFunction();
11012   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
11013 
11014   if (llvm::any_of(Regs, [&STI](auto Reg) {
11015         return STI.isRegisterReservedByUser(Reg.first);
11016       }))
11017     F.getContext().diagnose(DiagnosticInfoUnsupported{
11018         F, "Argument register required, but has been reserved."});
11019 }
11020 
11021 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
11022   return CI->isTailCall();
11023 }
11024 
11025 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
11026 #define NODE_NAME_CASE(NODE)                                                   \
11027   case RISCVISD::NODE:                                                         \
11028     return "RISCVISD::" #NODE;
11029   // clang-format off
11030   switch ((RISCVISD::NodeType)Opcode) {
11031   case RISCVISD::FIRST_NUMBER:
11032     break;
11033   NODE_NAME_CASE(RET_FLAG)
11034   NODE_NAME_CASE(URET_FLAG)
11035   NODE_NAME_CASE(SRET_FLAG)
11036   NODE_NAME_CASE(MRET_FLAG)
11037   NODE_NAME_CASE(CALL)
11038   NODE_NAME_CASE(SELECT_CC)
11039   NODE_NAME_CASE(BR_CC)
11040   NODE_NAME_CASE(BuildPairF64)
11041   NODE_NAME_CASE(SplitF64)
11042   NODE_NAME_CASE(TAIL)
11043   NODE_NAME_CASE(MULHSU)
11044   NODE_NAME_CASE(SLLW)
11045   NODE_NAME_CASE(SRAW)
11046   NODE_NAME_CASE(SRLW)
11047   NODE_NAME_CASE(DIVW)
11048   NODE_NAME_CASE(DIVUW)
11049   NODE_NAME_CASE(REMUW)
11050   NODE_NAME_CASE(ROLW)
11051   NODE_NAME_CASE(RORW)
11052   NODE_NAME_CASE(CLZW)
11053   NODE_NAME_CASE(CTZW)
11054   NODE_NAME_CASE(FSLW)
11055   NODE_NAME_CASE(FSRW)
11056   NODE_NAME_CASE(FSL)
11057   NODE_NAME_CASE(FSR)
11058   NODE_NAME_CASE(FMV_H_X)
11059   NODE_NAME_CASE(FMV_X_ANYEXTH)
11060   NODE_NAME_CASE(FMV_X_SIGNEXTH)
11061   NODE_NAME_CASE(FMV_W_X_RV64)
11062   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
11063   NODE_NAME_CASE(FCVT_X)
11064   NODE_NAME_CASE(FCVT_XU)
11065   NODE_NAME_CASE(FCVT_W_RV64)
11066   NODE_NAME_CASE(FCVT_WU_RV64)
11067   NODE_NAME_CASE(STRICT_FCVT_W_RV64)
11068   NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
11069   NODE_NAME_CASE(READ_CYCLE_WIDE)
11070   NODE_NAME_CASE(GREV)
11071   NODE_NAME_CASE(GREVW)
11072   NODE_NAME_CASE(GORC)
11073   NODE_NAME_CASE(GORCW)
11074   NODE_NAME_CASE(SHFL)
11075   NODE_NAME_CASE(SHFLW)
11076   NODE_NAME_CASE(UNSHFL)
11077   NODE_NAME_CASE(UNSHFLW)
11078   NODE_NAME_CASE(BFP)
11079   NODE_NAME_CASE(BFPW)
11080   NODE_NAME_CASE(BCOMPRESS)
11081   NODE_NAME_CASE(BCOMPRESSW)
11082   NODE_NAME_CASE(BDECOMPRESS)
11083   NODE_NAME_CASE(BDECOMPRESSW)
11084   NODE_NAME_CASE(VMV_V_X_VL)
11085   NODE_NAME_CASE(VFMV_V_F_VL)
11086   NODE_NAME_CASE(VMV_X_S)
11087   NODE_NAME_CASE(VMV_S_X_VL)
11088   NODE_NAME_CASE(VFMV_S_F_VL)
11089   NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
11090   NODE_NAME_CASE(READ_VLENB)
11091   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
11092   NODE_NAME_CASE(VSLIDEUP_VL)
11093   NODE_NAME_CASE(VSLIDE1UP_VL)
11094   NODE_NAME_CASE(VSLIDEDOWN_VL)
11095   NODE_NAME_CASE(VSLIDE1DOWN_VL)
11096   NODE_NAME_CASE(VID_VL)
11097   NODE_NAME_CASE(VFNCVT_ROD_VL)
11098   NODE_NAME_CASE(VECREDUCE_ADD_VL)
11099   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
11100   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
11101   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
11102   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
11103   NODE_NAME_CASE(VECREDUCE_AND_VL)
11104   NODE_NAME_CASE(VECREDUCE_OR_VL)
11105   NODE_NAME_CASE(VECREDUCE_XOR_VL)
11106   NODE_NAME_CASE(VECREDUCE_FADD_VL)
11107   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
11108   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
11109   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
11110   NODE_NAME_CASE(ADD_VL)
11111   NODE_NAME_CASE(AND_VL)
11112   NODE_NAME_CASE(MUL_VL)
11113   NODE_NAME_CASE(OR_VL)
11114   NODE_NAME_CASE(SDIV_VL)
11115   NODE_NAME_CASE(SHL_VL)
11116   NODE_NAME_CASE(SREM_VL)
11117   NODE_NAME_CASE(SRA_VL)
11118   NODE_NAME_CASE(SRL_VL)
11119   NODE_NAME_CASE(SUB_VL)
11120   NODE_NAME_CASE(UDIV_VL)
11121   NODE_NAME_CASE(UREM_VL)
11122   NODE_NAME_CASE(XOR_VL)
11123   NODE_NAME_CASE(SADDSAT_VL)
11124   NODE_NAME_CASE(UADDSAT_VL)
11125   NODE_NAME_CASE(SSUBSAT_VL)
11126   NODE_NAME_CASE(USUBSAT_VL)
11127   NODE_NAME_CASE(FADD_VL)
11128   NODE_NAME_CASE(FSUB_VL)
11129   NODE_NAME_CASE(FMUL_VL)
11130   NODE_NAME_CASE(FDIV_VL)
11131   NODE_NAME_CASE(FNEG_VL)
11132   NODE_NAME_CASE(FABS_VL)
11133   NODE_NAME_CASE(FSQRT_VL)
11134   NODE_NAME_CASE(FMA_VL)
11135   NODE_NAME_CASE(FCOPYSIGN_VL)
11136   NODE_NAME_CASE(SMIN_VL)
11137   NODE_NAME_CASE(SMAX_VL)
11138   NODE_NAME_CASE(UMIN_VL)
11139   NODE_NAME_CASE(UMAX_VL)
11140   NODE_NAME_CASE(FMINNUM_VL)
11141   NODE_NAME_CASE(FMAXNUM_VL)
11142   NODE_NAME_CASE(MULHS_VL)
11143   NODE_NAME_CASE(MULHU_VL)
11144   NODE_NAME_CASE(FP_TO_SINT_VL)
11145   NODE_NAME_CASE(FP_TO_UINT_VL)
11146   NODE_NAME_CASE(SINT_TO_FP_VL)
11147   NODE_NAME_CASE(UINT_TO_FP_VL)
11148   NODE_NAME_CASE(FP_EXTEND_VL)
11149   NODE_NAME_CASE(FP_ROUND_VL)
11150   NODE_NAME_CASE(VWMUL_VL)
11151   NODE_NAME_CASE(VWMULU_VL)
11152   NODE_NAME_CASE(VWMULSU_VL)
11153   NODE_NAME_CASE(VWADD_VL)
11154   NODE_NAME_CASE(VWADDU_VL)
11155   NODE_NAME_CASE(VWSUB_VL)
11156   NODE_NAME_CASE(VWSUBU_VL)
11157   NODE_NAME_CASE(VWADD_W_VL)
11158   NODE_NAME_CASE(VWADDU_W_VL)
11159   NODE_NAME_CASE(VWSUB_W_VL)
11160   NODE_NAME_CASE(VWSUBU_W_VL)
11161   NODE_NAME_CASE(SETCC_VL)
11162   NODE_NAME_CASE(VSELECT_VL)
11163   NODE_NAME_CASE(VP_MERGE_VL)
11164   NODE_NAME_CASE(VMAND_VL)
11165   NODE_NAME_CASE(VMOR_VL)
11166   NODE_NAME_CASE(VMXOR_VL)
11167   NODE_NAME_CASE(VMCLR_VL)
11168   NODE_NAME_CASE(VMSET_VL)
11169   NODE_NAME_CASE(VRGATHER_VX_VL)
11170   NODE_NAME_CASE(VRGATHER_VV_VL)
11171   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
11172   NODE_NAME_CASE(VSEXT_VL)
11173   NODE_NAME_CASE(VZEXT_VL)
11174   NODE_NAME_CASE(VCPOP_VL)
11175   NODE_NAME_CASE(READ_CSR)
11176   NODE_NAME_CASE(WRITE_CSR)
11177   NODE_NAME_CASE(SWAP_CSR)
11178   }
11179   // clang-format on
11180   return nullptr;
11181 #undef NODE_NAME_CASE
11182 }
11183 
11184 /// getConstraintType - Given a constraint letter, return the type of
11185 /// constraint it is for this target.
11186 RISCVTargetLowering::ConstraintType
11187 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
11188   if (Constraint.size() == 1) {
11189     switch (Constraint[0]) {
11190     default:
11191       break;
11192     case 'f':
11193       return C_RegisterClass;
11194     case 'I':
11195     case 'J':
11196     case 'K':
11197       return C_Immediate;
11198     case 'A':
11199       return C_Memory;
11200     case 'S': // A symbolic address
11201       return C_Other;
11202     }
11203   } else {
11204     if (Constraint == "vr" || Constraint == "vm")
11205       return C_RegisterClass;
11206   }
11207   return TargetLowering::getConstraintType(Constraint);
11208 }
11209 
11210 std::pair<unsigned, const TargetRegisterClass *>
11211 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
11212                                                   StringRef Constraint,
11213                                                   MVT VT) const {
11214   // First, see if this is a constraint that directly corresponds to a
11215   // RISCV register class.
11216   if (Constraint.size() == 1) {
11217     switch (Constraint[0]) {
11218     case 'r':
11219       // TODO: Support fixed vectors up to XLen for P extension?
11220       if (VT.isVector())
11221         break;
11222       return std::make_pair(0U, &RISCV::GPRRegClass);
11223     case 'f':
11224       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
11225         return std::make_pair(0U, &RISCV::FPR16RegClass);
11226       if (Subtarget.hasStdExtF() && VT == MVT::f32)
11227         return std::make_pair(0U, &RISCV::FPR32RegClass);
11228       if (Subtarget.hasStdExtD() && VT == MVT::f64)
11229         return std::make_pair(0U, &RISCV::FPR64RegClass);
11230       break;
11231     default:
11232       break;
11233     }
11234   } else if (Constraint == "vr") {
11235     for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
11236                            &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
11237       if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
11238         return std::make_pair(0U, RC);
11239     }
11240   } else if (Constraint == "vm") {
11241     if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
11242       return std::make_pair(0U, &RISCV::VMV0RegClass);
11243   }
11244 
11245   // Clang will correctly decode the usage of register name aliases into their
11246   // official names. However, other frontends like `rustc` do not. This allows
11247   // users of these frontends to use the ABI names for registers in LLVM-style
11248   // register constraints.
11249   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
11250                                .Case("{zero}", RISCV::X0)
11251                                .Case("{ra}", RISCV::X1)
11252                                .Case("{sp}", RISCV::X2)
11253                                .Case("{gp}", RISCV::X3)
11254                                .Case("{tp}", RISCV::X4)
11255                                .Case("{t0}", RISCV::X5)
11256                                .Case("{t1}", RISCV::X6)
11257                                .Case("{t2}", RISCV::X7)
11258                                .Cases("{s0}", "{fp}", RISCV::X8)
11259                                .Case("{s1}", RISCV::X9)
11260                                .Case("{a0}", RISCV::X10)
11261                                .Case("{a1}", RISCV::X11)
11262                                .Case("{a2}", RISCV::X12)
11263                                .Case("{a3}", RISCV::X13)
11264                                .Case("{a4}", RISCV::X14)
11265                                .Case("{a5}", RISCV::X15)
11266                                .Case("{a6}", RISCV::X16)
11267                                .Case("{a7}", RISCV::X17)
11268                                .Case("{s2}", RISCV::X18)
11269                                .Case("{s3}", RISCV::X19)
11270                                .Case("{s4}", RISCV::X20)
11271                                .Case("{s5}", RISCV::X21)
11272                                .Case("{s6}", RISCV::X22)
11273                                .Case("{s7}", RISCV::X23)
11274                                .Case("{s8}", RISCV::X24)
11275                                .Case("{s9}", RISCV::X25)
11276                                .Case("{s10}", RISCV::X26)
11277                                .Case("{s11}", RISCV::X27)
11278                                .Case("{t3}", RISCV::X28)
11279                                .Case("{t4}", RISCV::X29)
11280                                .Case("{t5}", RISCV::X30)
11281                                .Case("{t6}", RISCV::X31)
11282                                .Default(RISCV::NoRegister);
11283   if (XRegFromAlias != RISCV::NoRegister)
11284     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
11285 
11286   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
11287   // TableGen record rather than the AsmName to choose registers for InlineAsm
11288   // constraints, plus we want to match those names to the widest floating point
11289   // register type available, manually select floating point registers here.
11290   //
11291   // The second case is the ABI name of the register, so that frontends can also
11292   // use the ABI names in register constraint lists.
11293   if (Subtarget.hasStdExtF()) {
11294     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
11295                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
11296                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
11297                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
11298                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
11299                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
11300                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
11301                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
11302                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
11303                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
11304                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
11305                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
11306                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
11307                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
11308                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
11309                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
11310                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
11311                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
11312                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
11313                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
11314                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
11315                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
11316                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
11317                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
11318                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
11319                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
11320                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
11321                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
11322                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
11323                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
11324                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
11325                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
11326                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
11327                         .Default(RISCV::NoRegister);
11328     if (FReg != RISCV::NoRegister) {
11329       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
11330       if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
11331         unsigned RegNo = FReg - RISCV::F0_F;
11332         unsigned DReg = RISCV::F0_D + RegNo;
11333         return std::make_pair(DReg, &RISCV::FPR64RegClass);
11334       }
11335       if (VT == MVT::f32 || VT == MVT::Other)
11336         return std::make_pair(FReg, &RISCV::FPR32RegClass);
11337       if (Subtarget.hasStdExtZfh() && VT == MVT::f16) {
11338         unsigned RegNo = FReg - RISCV::F0_F;
11339         unsigned HReg = RISCV::F0_H + RegNo;
11340         return std::make_pair(HReg, &RISCV::FPR16RegClass);
11341       }
11342     }
11343   }
11344 
11345   if (Subtarget.hasVInstructions()) {
11346     Register VReg = StringSwitch<Register>(Constraint.lower())
11347                         .Case("{v0}", RISCV::V0)
11348                         .Case("{v1}", RISCV::V1)
11349                         .Case("{v2}", RISCV::V2)
11350                         .Case("{v3}", RISCV::V3)
11351                         .Case("{v4}", RISCV::V4)
11352                         .Case("{v5}", RISCV::V5)
11353                         .Case("{v6}", RISCV::V6)
11354                         .Case("{v7}", RISCV::V7)
11355                         .Case("{v8}", RISCV::V8)
11356                         .Case("{v9}", RISCV::V9)
11357                         .Case("{v10}", RISCV::V10)
11358                         .Case("{v11}", RISCV::V11)
11359                         .Case("{v12}", RISCV::V12)
11360                         .Case("{v13}", RISCV::V13)
11361                         .Case("{v14}", RISCV::V14)
11362                         .Case("{v15}", RISCV::V15)
11363                         .Case("{v16}", RISCV::V16)
11364                         .Case("{v17}", RISCV::V17)
11365                         .Case("{v18}", RISCV::V18)
11366                         .Case("{v19}", RISCV::V19)
11367                         .Case("{v20}", RISCV::V20)
11368                         .Case("{v21}", RISCV::V21)
11369                         .Case("{v22}", RISCV::V22)
11370                         .Case("{v23}", RISCV::V23)
11371                         .Case("{v24}", RISCV::V24)
11372                         .Case("{v25}", RISCV::V25)
11373                         .Case("{v26}", RISCV::V26)
11374                         .Case("{v27}", RISCV::V27)
11375                         .Case("{v28}", RISCV::V28)
11376                         .Case("{v29}", RISCV::V29)
11377                         .Case("{v30}", RISCV::V30)
11378                         .Case("{v31}", RISCV::V31)
11379                         .Default(RISCV::NoRegister);
11380     if (VReg != RISCV::NoRegister) {
11381       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
11382         return std::make_pair(VReg, &RISCV::VMRegClass);
11383       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
11384         return std::make_pair(VReg, &RISCV::VRRegClass);
11385       for (const auto *RC :
11386            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
11387         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
11388           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
11389           return std::make_pair(VReg, RC);
11390         }
11391       }
11392     }
11393   }
11394 
11395   std::pair<Register, const TargetRegisterClass *> Res =
11396       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
11397 
11398   // If we picked one of the Zfinx register classes, remap it to the GPR class.
11399   // FIXME: When Zfinx is supported in CodeGen this will need to take the
11400   // Subtarget into account.
11401   if (Res.second == &RISCV::GPRF16RegClass ||
11402       Res.second == &RISCV::GPRF32RegClass ||
11403       Res.second == &RISCV::GPRF64RegClass)
11404     return std::make_pair(Res.first, &RISCV::GPRRegClass);
11405 
11406   return Res;
11407 }
11408 
11409 unsigned
11410 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
11411   // Currently only support length 1 constraints.
11412   if (ConstraintCode.size() == 1) {
11413     switch (ConstraintCode[0]) {
11414     case 'A':
11415       return InlineAsm::Constraint_A;
11416     default:
11417       break;
11418     }
11419   }
11420 
11421   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
11422 }
11423 
11424 void RISCVTargetLowering::LowerAsmOperandForConstraint(
11425     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
11426     SelectionDAG &DAG) const {
11427   // Currently only support length 1 constraints.
11428   if (Constraint.length() == 1) {
11429     switch (Constraint[0]) {
11430     case 'I':
11431       // Validate & create a 12-bit signed immediate operand.
11432       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11433         uint64_t CVal = C->getSExtValue();
11434         if (isInt<12>(CVal))
11435           Ops.push_back(
11436               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
11437       }
11438       return;
11439     case 'J':
11440       // Validate & create an integer zero operand.
11441       if (auto *C = dyn_cast<ConstantSDNode>(Op))
11442         if (C->getZExtValue() == 0)
11443           Ops.push_back(
11444               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
11445       return;
11446     case 'K':
11447       // Validate & create a 5-bit unsigned immediate operand.
11448       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11449         uint64_t CVal = C->getZExtValue();
11450         if (isUInt<5>(CVal))
11451           Ops.push_back(
11452               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
11453       }
11454       return;
11455     case 'S':
11456       if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
11457         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
11458                                                  GA->getValueType(0)));
11459       } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
11460         Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
11461                                                 BA->getValueType(0)));
11462       }
11463       return;
11464     default:
11465       break;
11466     }
11467   }
11468   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
11469 }
11470 
11471 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
11472                                                    Instruction *Inst,
11473                                                    AtomicOrdering Ord) const {
11474   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
11475     return Builder.CreateFence(Ord);
11476   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
11477     return Builder.CreateFence(AtomicOrdering::Release);
11478   return nullptr;
11479 }
11480 
11481 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
11482                                                     Instruction *Inst,
11483                                                     AtomicOrdering Ord) const {
11484   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
11485     return Builder.CreateFence(AtomicOrdering::Acquire);
11486   return nullptr;
11487 }
11488 
11489 TargetLowering::AtomicExpansionKind
11490 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
11491   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
11492   // point operations can't be used in an lr/sc sequence without breaking the
11493   // forward-progress guarantee.
11494   if (AI->isFloatingPointOperation())
11495     return AtomicExpansionKind::CmpXChg;
11496 
11497   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
11498   if (Size == 8 || Size == 16)
11499     return AtomicExpansionKind::MaskedIntrinsic;
11500   return AtomicExpansionKind::None;
11501 }
11502 
11503 static Intrinsic::ID
11504 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
11505   if (XLen == 32) {
11506     switch (BinOp) {
11507     default:
11508       llvm_unreachable("Unexpected AtomicRMW BinOp");
11509     case AtomicRMWInst::Xchg:
11510       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
11511     case AtomicRMWInst::Add:
11512       return Intrinsic::riscv_masked_atomicrmw_add_i32;
11513     case AtomicRMWInst::Sub:
11514       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
11515     case AtomicRMWInst::Nand:
11516       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
11517     case AtomicRMWInst::Max:
11518       return Intrinsic::riscv_masked_atomicrmw_max_i32;
11519     case AtomicRMWInst::Min:
11520       return Intrinsic::riscv_masked_atomicrmw_min_i32;
11521     case AtomicRMWInst::UMax:
11522       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
11523     case AtomicRMWInst::UMin:
11524       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
11525     }
11526   }
11527 
11528   if (XLen == 64) {
11529     switch (BinOp) {
11530     default:
11531       llvm_unreachable("Unexpected AtomicRMW BinOp");
11532     case AtomicRMWInst::Xchg:
11533       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
11534     case AtomicRMWInst::Add:
11535       return Intrinsic::riscv_masked_atomicrmw_add_i64;
11536     case AtomicRMWInst::Sub:
11537       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
11538     case AtomicRMWInst::Nand:
11539       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
11540     case AtomicRMWInst::Max:
11541       return Intrinsic::riscv_masked_atomicrmw_max_i64;
11542     case AtomicRMWInst::Min:
11543       return Intrinsic::riscv_masked_atomicrmw_min_i64;
11544     case AtomicRMWInst::UMax:
11545       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
11546     case AtomicRMWInst::UMin:
11547       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
11548     }
11549   }
11550 
11551   llvm_unreachable("Unexpected XLen\n");
11552 }
11553 
11554 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
11555     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
11556     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
11557   unsigned XLen = Subtarget.getXLen();
11558   Value *Ordering =
11559       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
11560   Type *Tys[] = {AlignedAddr->getType()};
11561   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
11562       AI->getModule(),
11563       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
11564 
11565   if (XLen == 64) {
11566     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
11567     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
11568     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
11569   }
11570 
11571   Value *Result;
11572 
11573   // Must pass the shift amount needed to sign extend the loaded value prior
11574   // to performing a signed comparison for min/max. ShiftAmt is the number of
11575   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
11576   // is the number of bits to left+right shift the value in order to
11577   // sign-extend.
11578   if (AI->getOperation() == AtomicRMWInst::Min ||
11579       AI->getOperation() == AtomicRMWInst::Max) {
11580     const DataLayout &DL = AI->getModule()->getDataLayout();
11581     unsigned ValWidth =
11582         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
11583     Value *SextShamt =
11584         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
11585     Result = Builder.CreateCall(LrwOpScwLoop,
11586                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
11587   } else {
11588     Result =
11589         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
11590   }
11591 
11592   if (XLen == 64)
11593     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11594   return Result;
11595 }
11596 
11597 TargetLowering::AtomicExpansionKind
11598 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
11599     AtomicCmpXchgInst *CI) const {
11600   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
11601   if (Size == 8 || Size == 16)
11602     return AtomicExpansionKind::MaskedIntrinsic;
11603   return AtomicExpansionKind::None;
11604 }
11605 
11606 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
11607     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
11608     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
11609   unsigned XLen = Subtarget.getXLen();
11610   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
11611   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
11612   if (XLen == 64) {
11613     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
11614     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
11615     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
11616     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
11617   }
11618   Type *Tys[] = {AlignedAddr->getType()};
11619   Function *MaskedCmpXchg =
11620       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
11621   Value *Result = Builder.CreateCall(
11622       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
11623   if (XLen == 64)
11624     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11625   return Result;
11626 }
11627 
11628 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
11629                                                         EVT DataVT) const {
11630   return false;
11631 }
11632 
11633 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
11634                                                EVT VT) const {
11635   if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
11636     return false;
11637 
11638   switch (FPVT.getSimpleVT().SimpleTy) {
11639   case MVT::f16:
11640     return Subtarget.hasStdExtZfh();
11641   case MVT::f32:
11642     return Subtarget.hasStdExtF();
11643   case MVT::f64:
11644     return Subtarget.hasStdExtD();
11645   default:
11646     return false;
11647   }
11648 }
11649 
11650 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
11651   // If we are using the small code model, we can reduce size of jump table
11652   // entry to 4 bytes.
11653   if (Subtarget.is64Bit() && !isPositionIndependent() &&
11654       getTargetMachine().getCodeModel() == CodeModel::Small) {
11655     return MachineJumpTableInfo::EK_Custom32;
11656   }
11657   return TargetLowering::getJumpTableEncoding();
11658 }
11659 
11660 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
11661     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
11662     unsigned uid, MCContext &Ctx) const {
11663   assert(Subtarget.is64Bit() && !isPositionIndependent() &&
11664          getTargetMachine().getCodeModel() == CodeModel::Small);
11665   return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
11666 }
11667 
11668 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
11669                                                      EVT VT) const {
11670   VT = VT.getScalarType();
11671 
11672   if (!VT.isSimple())
11673     return false;
11674 
11675   switch (VT.getSimpleVT().SimpleTy) {
11676   case MVT::f16:
11677     return Subtarget.hasStdExtZfh();
11678   case MVT::f32:
11679     return Subtarget.hasStdExtF();
11680   case MVT::f64:
11681     return Subtarget.hasStdExtD();
11682   default:
11683     break;
11684   }
11685 
11686   return false;
11687 }
11688 
11689 Register RISCVTargetLowering::getExceptionPointerRegister(
11690     const Constant *PersonalityFn) const {
11691   return RISCV::X10;
11692 }
11693 
11694 Register RISCVTargetLowering::getExceptionSelectorRegister(
11695     const Constant *PersonalityFn) const {
11696   return RISCV::X11;
11697 }
11698 
11699 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
11700   // Return false to suppress the unnecessary extensions if the LibCall
11701   // arguments or return value is f32 type for LP64 ABI.
11702   RISCVABI::ABI ABI = Subtarget.getTargetABI();
11703   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
11704     return false;
11705 
11706   return true;
11707 }
11708 
11709 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
11710   if (Subtarget.is64Bit() && Type == MVT::i32)
11711     return true;
11712 
11713   return IsSigned;
11714 }
11715 
11716 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
11717                                                  SDValue C) const {
11718   // Check integral scalar types.
11719   if (VT.isScalarInteger()) {
11720     // Omit the optimization if the sub target has the M extension and the data
11721     // size exceeds XLen.
11722     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
11723       return false;
11724     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
11725       // Break the MUL to a SLLI and an ADD/SUB.
11726       const APInt &Imm = ConstNode->getAPIntValue();
11727       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
11728           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
11729         return true;
11730       // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
11731       if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
11732           ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
11733            (Imm - 8).isPowerOf2()))
11734         return true;
11735       // Omit the following optimization if the sub target has the M extension
11736       // and the data size >= XLen.
11737       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
11738         return false;
11739       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
11740       // a pair of LUI/ADDI.
11741       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
11742         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
11743         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
11744             (1 - ImmS).isPowerOf2())
11745         return true;
11746       }
11747     }
11748   }
11749 
11750   return false;
11751 }
11752 
11753 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
11754                                                       SDValue ConstNode) const {
11755   // Let the DAGCombiner decide for vectors.
11756   EVT VT = AddNode.getValueType();
11757   if (VT.isVector())
11758     return true;
11759 
11760   // Let the DAGCombiner decide for larger types.
11761   if (VT.getScalarSizeInBits() > Subtarget.getXLen())
11762     return true;
11763 
11764   // It is worse if c1 is simm12 while c1*c2 is not.
11765   ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
11766   ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
11767   const APInt &C1 = C1Node->getAPIntValue();
11768   const APInt &C2 = C2Node->getAPIntValue();
11769   if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
11770     return false;
11771 
11772   // Default to true and let the DAGCombiner decide.
11773   return true;
11774 }
11775 
11776 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
11777     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
11778     bool *Fast) const {
11779   if (!VT.isVector()) {
11780     if (Fast)
11781       *Fast = false;
11782     return Subtarget.enableUnalignedScalarMem();
11783   }
11784 
11785   // All vector implementations must support element alignment
11786   EVT ElemVT = VT.getVectorElementType();
11787   if (Alignment >= ElemVT.getStoreSize()) {
11788     if (Fast)
11789       *Fast = true;
11790     return true;
11791   }
11792 
11793   return false;
11794 }
11795 
11796 bool RISCVTargetLowering::splitValueIntoRegisterParts(
11797     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
11798     unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
11799   bool IsABIRegCopy = CC.hasValue();
11800   EVT ValueVT = Val.getValueType();
11801   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
11802     // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
11803     // and cast to f32.
11804     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
11805     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
11806     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
11807                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
11808     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
11809     Parts[0] = Val;
11810     return true;
11811   }
11812 
11813   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
11814     LLVMContext &Context = *DAG.getContext();
11815     EVT ValueEltVT = ValueVT.getVectorElementType();
11816     EVT PartEltVT = PartVT.getVectorElementType();
11817     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
11818     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
11819     if (PartVTBitSize % ValueVTBitSize == 0) {
11820       assert(PartVTBitSize >= ValueVTBitSize);
11821       // If the element types are different, bitcast to the same element type of
11822       // PartVT first.
11823       // Give an example here, we want copy a <vscale x 1 x i8> value to
11824       // <vscale x 4 x i16>.
11825       // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
11826       // subvector, then we can bitcast to <vscale x 4 x i16>.
11827       if (ValueEltVT != PartEltVT) {
11828         if (PartVTBitSize > ValueVTBitSize) {
11829           unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
11830           assert(Count != 0 && "The number of element should not be zero.");
11831           EVT SameEltTypeVT =
11832               EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
11833           Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
11834                             DAG.getUNDEF(SameEltTypeVT), Val,
11835                             DAG.getVectorIdxConstant(0, DL));
11836         }
11837         Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
11838       } else {
11839         Val =
11840             DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
11841                         Val, DAG.getVectorIdxConstant(0, DL));
11842       }
11843       Parts[0] = Val;
11844       return true;
11845     }
11846   }
11847   return false;
11848 }
11849 
11850 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
11851     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
11852     MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
11853   bool IsABIRegCopy = CC.hasValue();
11854   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
11855     SDValue Val = Parts[0];
11856 
11857     // Cast the f32 to i32, truncate to i16, and cast back to f16.
11858     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
11859     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
11860     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
11861     return Val;
11862   }
11863 
11864   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
11865     LLVMContext &Context = *DAG.getContext();
11866     SDValue Val = Parts[0];
11867     EVT ValueEltVT = ValueVT.getVectorElementType();
11868     EVT PartEltVT = PartVT.getVectorElementType();
11869     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
11870     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
11871     if (PartVTBitSize % ValueVTBitSize == 0) {
11872       assert(PartVTBitSize >= ValueVTBitSize);
11873       EVT SameEltTypeVT = ValueVT;
11874       // If the element types are different, convert it to the same element type
11875       // of PartVT.
11876       // Give an example here, we want copy a <vscale x 1 x i8> value from
11877       // <vscale x 4 x i16>.
11878       // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
11879       // then we can extract <vscale x 1 x i8>.
11880       if (ValueEltVT != PartEltVT) {
11881         unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
11882         assert(Count != 0 && "The number of element should not be zero.");
11883         SameEltTypeVT =
11884             EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
11885         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
11886       }
11887       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
11888                         DAG.getVectorIdxConstant(0, DL));
11889       return Val;
11890     }
11891   }
11892   return SDValue();
11893 }
11894 
11895 SDValue
11896 RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
11897                                    SelectionDAG &DAG,
11898                                    SmallVectorImpl<SDNode *> &Created) const {
11899   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
11900   if (isIntDivCheap(N->getValueType(0), Attr))
11901     return SDValue(N, 0); // Lower SDIV as SDIV
11902 
11903   assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
11904          "Unexpected divisor!");
11905 
11906   // Conditional move is needed, so do the transformation iff Zbt is enabled.
11907   if (!Subtarget.hasStdExtZbt())
11908     return SDValue();
11909 
11910   // When |Divisor| >= 2 ^ 12, it isn't profitable to do such transformation.
11911   // Besides, more critical path instructions will be generated when dividing
11912   // by 2. So we keep using the original DAGs for these cases.
11913   unsigned Lg2 = Divisor.countTrailingZeros();
11914   if (Lg2 == 1 || Lg2 >= 12)
11915     return SDValue();
11916 
11917   // fold (sdiv X, pow2)
11918   EVT VT = N->getValueType(0);
11919   if (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))
11920     return SDValue();
11921 
11922   SDLoc DL(N);
11923   SDValue N0 = N->getOperand(0);
11924   SDValue Zero = DAG.getConstant(0, DL, VT);
11925   SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
11926 
11927   // Add (N0 < 0) ? Pow2 - 1 : 0;
11928   SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT);
11929   SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
11930   SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
11931 
11932   Created.push_back(Cmp.getNode());
11933   Created.push_back(Add.getNode());
11934   Created.push_back(Sel.getNode());
11935 
11936   // Divide by pow2.
11937   SDValue SRA =
11938       DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT));
11939 
11940   // If we're dividing by a positive value, we're done.  Otherwise, we must
11941   // negate the result.
11942   if (Divisor.isNonNegative())
11943     return SRA;
11944 
11945   Created.push_back(SRA.getNode());
11946   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
11947 }
11948 
11949 #define GET_REGISTER_MATCHER
11950 #include "RISCVGenAsmMatcher.inc"
11951 
11952 Register
11953 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
11954                                        const MachineFunction &MF) const {
11955   Register Reg = MatchRegisterAltName(RegName);
11956   if (Reg == RISCV::NoRegister)
11957     Reg = MatchRegisterName(RegName);
11958   if (Reg == RISCV::NoRegister)
11959     report_fatal_error(
11960         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
11961   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
11962   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
11963     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
11964                              StringRef(RegName) + "\"."));
11965   return Reg;
11966 }
11967 
11968 namespace llvm {
11969 namespace RISCVVIntrinsicsTable {
11970 
11971 #define GET_RISCVVIntrinsicsTable_IMPL
11972 #include "RISCVGenSearchableTables.inc"
11973 
11974 } // namespace RISCVVIntrinsicsTable
11975 
11976 } // namespace llvm
11977