1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtZfh())
87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88   if (Subtarget.hasStdExtF())
89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90   if (Subtarget.hasStdExtD())
91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93   static const MVT::SimpleValueType BoolVecVTs[] = {
94       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
95       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
96   static const MVT::SimpleValueType IntVecVTs[] = {
97       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
98       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
99       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
100       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
101       MVT::nxv4i64, MVT::nxv8i64};
102   static const MVT::SimpleValueType F16VecVTs[] = {
103       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
104       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
105   static const MVT::SimpleValueType F32VecVTs[] = {
106       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
107   static const MVT::SimpleValueType F64VecVTs[] = {
108       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
109 
110   if (Subtarget.hasStdExtV()) {
111     auto addRegClassForRVV = [this](MVT VT) {
112       unsigned Size = VT.getSizeInBits().getKnownMinValue();
113       assert(Size <= 512 && isPowerOf2_32(Size));
114       const TargetRegisterClass *RC;
115       if (Size <= 64)
116         RC = &RISCV::VRRegClass;
117       else if (Size == 128)
118         RC = &RISCV::VRM2RegClass;
119       else if (Size == 256)
120         RC = &RISCV::VRM4RegClass;
121       else
122         RC = &RISCV::VRM8RegClass;
123 
124       addRegisterClass(VT, RC);
125     };
126 
127     for (MVT VT : BoolVecVTs)
128       addRegClassForRVV(VT);
129     for (MVT VT : IntVecVTs)
130       addRegClassForRVV(VT);
131 
132     if (Subtarget.hasStdExtZfh())
133       for (MVT VT : F16VecVTs)
134         addRegClassForRVV(VT);
135 
136     if (Subtarget.hasStdExtF())
137       for (MVT VT : F32VecVTs)
138         addRegClassForRVV(VT);
139 
140     if (Subtarget.hasStdExtD())
141       for (MVT VT : F64VecVTs)
142         addRegClassForRVV(VT);
143 
144     if (Subtarget.useRVVForFixedLengthVectors()) {
145       auto addRegClassForFixedVectors = [this](MVT VT) {
146         unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
147         const TargetRegisterClass *RC;
148         if (LMul == 1)
149           RC = &RISCV::VRRegClass;
150         else if (LMul == 2)
151           RC = &RISCV::VRM2RegClass;
152         else if (LMul == 4)
153           RC = &RISCV::VRM4RegClass;
154         else if (LMul == 8)
155           RC = &RISCV::VRM8RegClass;
156         else
157           llvm_unreachable("Unexpected LMul!");
158 
159         addRegisterClass(VT, RC);
160       };
161       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
162         if (useRVVForFixedLengthVectorVT(VT))
163           addRegClassForFixedVectors(VT);
164 
165       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
166         if (useRVVForFixedLengthVectorVT(VT))
167           addRegClassForFixedVectors(VT);
168     }
169   }
170 
171   // Compute derived properties from the register classes.
172   computeRegisterProperties(STI.getRegisterInfo());
173 
174   setStackPointerRegisterToSaveRestore(RISCV::X2);
175 
176   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
177     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
178 
179   // TODO: add all necessary setOperationAction calls.
180   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
181 
182   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
183   setOperationAction(ISD::BR_CC, XLenVT, Expand);
184   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
185 
186   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
187   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
188 
189   setOperationAction(ISD::VASTART, MVT::Other, Custom);
190   setOperationAction(ISD::VAARG, MVT::Other, Expand);
191   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
192   setOperationAction(ISD::VAEND, MVT::Other, Expand);
193 
194   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
195   if (!Subtarget.hasStdExtZbb()) {
196     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
197     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
198   }
199 
200   if (Subtarget.is64Bit()) {
201     setOperationAction(ISD::ADD, MVT::i32, Custom);
202     setOperationAction(ISD::SUB, MVT::i32, Custom);
203     setOperationAction(ISD::SHL, MVT::i32, Custom);
204     setOperationAction(ISD::SRA, MVT::i32, Custom);
205     setOperationAction(ISD::SRL, MVT::i32, Custom);
206   }
207 
208   if (!Subtarget.hasStdExtM()) {
209     setOperationAction(ISD::MUL, XLenVT, Expand);
210     setOperationAction(ISD::MULHS, XLenVT, Expand);
211     setOperationAction(ISD::MULHU, XLenVT, Expand);
212     setOperationAction(ISD::SDIV, XLenVT, Expand);
213     setOperationAction(ISD::UDIV, XLenVT, Expand);
214     setOperationAction(ISD::SREM, XLenVT, Expand);
215     setOperationAction(ISD::UREM, XLenVT, Expand);
216   }
217 
218   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
219     setOperationAction(ISD::MUL, MVT::i32, Custom);
220 
221     setOperationAction(ISD::SDIV, MVT::i8, Custom);
222     setOperationAction(ISD::UDIV, MVT::i8, Custom);
223     setOperationAction(ISD::UREM, MVT::i8, Custom);
224     setOperationAction(ISD::SDIV, MVT::i16, Custom);
225     setOperationAction(ISD::UDIV, MVT::i16, Custom);
226     setOperationAction(ISD::UREM, MVT::i16, Custom);
227     setOperationAction(ISD::SDIV, MVT::i32, Custom);
228     setOperationAction(ISD::UDIV, MVT::i32, Custom);
229     setOperationAction(ISD::UREM, MVT::i32, Custom);
230   }
231 
232   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
233   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
234   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
235   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
236 
237   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
238   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
239   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
240 
241   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
242     if (Subtarget.is64Bit()) {
243       setOperationAction(ISD::ROTL, MVT::i32, Custom);
244       setOperationAction(ISD::ROTR, MVT::i32, Custom);
245     }
246   } else {
247     setOperationAction(ISD::ROTL, XLenVT, Expand);
248     setOperationAction(ISD::ROTR, XLenVT, Expand);
249   }
250 
251   if (Subtarget.hasStdExtZbp()) {
252     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
253     // more combining.
254     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
255     setOperationAction(ISD::BSWAP, XLenVT, Custom);
256 
257     if (Subtarget.is64Bit()) {
258       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
259       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
260     }
261   } else {
262     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
263     // pattern match it directly in isel.
264     setOperationAction(ISD::BSWAP, XLenVT,
265                        Subtarget.hasStdExtZbb() ? Legal : Expand);
266   }
267 
268   if (Subtarget.hasStdExtZbb()) {
269     setOperationAction(ISD::SMIN, XLenVT, Legal);
270     setOperationAction(ISD::SMAX, XLenVT, Legal);
271     setOperationAction(ISD::UMIN, XLenVT, Legal);
272     setOperationAction(ISD::UMAX, XLenVT, Legal);
273   } else {
274     setOperationAction(ISD::CTTZ, XLenVT, Expand);
275     setOperationAction(ISD::CTLZ, XLenVT, Expand);
276     setOperationAction(ISD::CTPOP, XLenVT, Expand);
277   }
278 
279   if (Subtarget.hasStdExtZbt()) {
280     setOperationAction(ISD::FSHL, XLenVT, Custom);
281     setOperationAction(ISD::FSHR, XLenVT, Custom);
282     setOperationAction(ISD::SELECT, XLenVT, Legal);
283 
284     if (Subtarget.is64Bit()) {
285       setOperationAction(ISD::FSHL, MVT::i32, Custom);
286       setOperationAction(ISD::FSHR, MVT::i32, Custom);
287     }
288   } else {
289     setOperationAction(ISD::SELECT, XLenVT, Custom);
290   }
291 
292   ISD::CondCode FPCCToExpand[] = {
293       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
294       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
295       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
296 
297   ISD::NodeType FPOpToExpand[] = {
298       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
299       ISD::FP_TO_FP16};
300 
301   if (Subtarget.hasStdExtZfh())
302     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
303 
304   if (Subtarget.hasStdExtZfh()) {
305     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
306     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
307     for (auto CC : FPCCToExpand)
308       setCondCodeAction(CC, MVT::f16, Expand);
309     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
310     setOperationAction(ISD::SELECT, MVT::f16, Custom);
311     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
312     for (auto Op : FPOpToExpand)
313       setOperationAction(Op, MVT::f16, Expand);
314   }
315 
316   if (Subtarget.hasStdExtF()) {
317     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
318     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
319     for (auto CC : FPCCToExpand)
320       setCondCodeAction(CC, MVT::f32, Expand);
321     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
322     setOperationAction(ISD::SELECT, MVT::f32, Custom);
323     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
324     for (auto Op : FPOpToExpand)
325       setOperationAction(Op, MVT::f32, Expand);
326     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
327     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
328   }
329 
330   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
331     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
332 
333   if (Subtarget.hasStdExtD()) {
334     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
335     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
336     for (auto CC : FPCCToExpand)
337       setCondCodeAction(CC, MVT::f64, Expand);
338     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
339     setOperationAction(ISD::SELECT, MVT::f64, Custom);
340     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
341     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
342     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
343     for (auto Op : FPOpToExpand)
344       setOperationAction(Op, MVT::f64, Expand);
345     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
346     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
347   }
348 
349   if (Subtarget.is64Bit()) {
350     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
351     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
352     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
353     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
354   }
355 
356   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
357   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
358   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
359   setOperationAction(ISD::JumpTable, XLenVT, Custom);
360 
361   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
362 
363   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
364   // Unfortunately this can't be determined just from the ISA naming string.
365   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
366                      Subtarget.is64Bit() ? Legal : Custom);
367 
368   setOperationAction(ISD::TRAP, MVT::Other, Legal);
369   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
370   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
371 
372   if (Subtarget.hasStdExtA()) {
373     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
374     setMinCmpXchgSizeInBits(32);
375   } else {
376     setMaxAtomicSizeInBitsSupported(0);
377   }
378 
379   setBooleanContents(ZeroOrOneBooleanContent);
380 
381   if (Subtarget.hasStdExtV()) {
382     setBooleanVectorContents(ZeroOrOneBooleanContent);
383 
384     setOperationAction(ISD::VSCALE, XLenVT, Custom);
385 
386     // RVV intrinsics may have illegal operands.
387     // We also need to custom legalize vmv.x.s.
388     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
389     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
390     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
391     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
392     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
393     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
394 
395     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
396 
397     if (Subtarget.is64Bit()) {
398       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
399       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
400     } else {
401       // We must custom-lower certain vXi64 operations on RV32 due to the vector
402       // element type being illegal.
403       setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
404       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
405       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
406 
407       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
408       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
409       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
410       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
411       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
412       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
413       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
414       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
415     }
416 
417     for (MVT VT : BoolVecVTs) {
418       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
419 
420       // Mask VTs are custom-expanded into a series of standard nodes
421       setOperationAction(ISD::TRUNCATE, VT, Custom);
422     }
423 
424     for (MVT VT : IntVecVTs) {
425       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
426 
427       setOperationAction(ISD::SMIN, VT, Legal);
428       setOperationAction(ISD::SMAX, VT, Legal);
429       setOperationAction(ISD::UMIN, VT, Legal);
430       setOperationAction(ISD::UMAX, VT, Legal);
431 
432       setOperationAction(ISD::ROTL, VT, Expand);
433       setOperationAction(ISD::ROTR, VT, Expand);
434 
435       // Custom-lower extensions and truncations from/to mask types.
436       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
437       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
438       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
439 
440       // RVV has native int->float & float->int conversions where the
441       // element type sizes are within one power-of-two of each other. Any
442       // wider distances between type sizes have to be lowered as sequences
443       // which progressively narrow the gap in stages.
444       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
445       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
446       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
447       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
448 
449       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR"
450       // nodes which truncate by one power of two at a time.
451       setOperationAction(ISD::TRUNCATE, VT, Custom);
452 
453       // Custom-lower insert/extract operations to simplify patterns.
454       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
455       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
456 
457       // Custom-lower reduction operations to set up the corresponding custom
458       // nodes' operands.
459       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
460       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
461       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
462       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
463       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
464       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
465       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
466       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
467     }
468 
469     // Expand various CCs to best match the RVV ISA, which natively supports UNE
470     // but no other unordered comparisons, and supports all ordered comparisons
471     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
472     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
473     // and we pattern-match those back to the "original", swapping operands once
474     // more. This way we catch both operations and both "vf" and "fv" forms with
475     // fewer patterns.
476     ISD::CondCode VFPCCToExpand[] = {
477         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
478         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
479         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
480     };
481 
482     // Sets common operation actions on RVV floating-point vector types.
483     const auto SetCommonVFPActions = [&](MVT VT) {
484       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
485       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
486       // sizes are within one power-of-two of each other. Therefore conversions
487       // between vXf16 and vXf64 must be lowered as sequences which convert via
488       // vXf32.
489       setOperationAction(ISD::FP_ROUND, VT, Custom);
490       setOperationAction(ISD::FP_EXTEND, VT, Custom);
491       // Custom-lower insert/extract operations to simplify patterns.
492       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
493       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
494       // Expand various condition codes (explained above).
495       for (auto CC : VFPCCToExpand)
496         setCondCodeAction(CC, VT, Expand);
497 
498       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
499       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
500     };
501 
502     if (Subtarget.hasStdExtZfh())
503       for (MVT VT : F16VecVTs)
504         SetCommonVFPActions(VT);
505 
506     if (Subtarget.hasStdExtF())
507       for (MVT VT : F32VecVTs)
508         SetCommonVFPActions(VT);
509 
510     if (Subtarget.hasStdExtD())
511       for (MVT VT : F64VecVTs)
512         SetCommonVFPActions(VT);
513 
514     if (Subtarget.useRVVForFixedLengthVectors()) {
515       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
516         if (!useRVVForFixedLengthVectorVT(VT))
517           continue;
518 
519         // By default everything must be expanded.
520         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
521           setOperationAction(Op, VT, Expand);
522 
523         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
524         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
525 
526         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
527 
528         setOperationAction(ISD::LOAD, VT, Custom);
529         setOperationAction(ISD::STORE, VT, Custom);
530         setOperationAction(ISD::ADD, VT, Custom);
531         setOperationAction(ISD::MUL, VT, Custom);
532         setOperationAction(ISD::SUB, VT, Custom);
533         setOperationAction(ISD::AND, VT, Custom);
534         setOperationAction(ISD::OR, VT, Custom);
535         setOperationAction(ISD::XOR, VT, Custom);
536         setOperationAction(ISD::SDIV, VT, Custom);
537         setOperationAction(ISD::SREM, VT, Custom);
538         setOperationAction(ISD::UDIV, VT, Custom);
539         setOperationAction(ISD::UREM, VT, Custom);
540         setOperationAction(ISD::SHL, VT, Custom);
541         setOperationAction(ISD::SRA, VT, Custom);
542         setOperationAction(ISD::SRL, VT, Custom);
543       }
544 
545       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
546         if (!useRVVForFixedLengthVectorVT(VT))
547           continue;
548 
549         // By default everything must be expanded.
550         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
551           setOperationAction(Op, VT, Expand);
552 
553         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
554         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
555 
556         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
557 
558         setOperationAction(ISD::LOAD, VT, Custom);
559         setOperationAction(ISD::STORE, VT, Custom);
560         setOperationAction(ISD::FADD, VT, Custom);
561         setOperationAction(ISD::FSUB, VT, Custom);
562         setOperationAction(ISD::FMUL, VT, Custom);
563         setOperationAction(ISD::FDIV, VT, Custom);
564         setOperationAction(ISD::FNEG, VT, Custom);
565         setOperationAction(ISD::FMA, VT, Custom);
566       }
567     }
568   }
569 
570   // Function alignments.
571   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
572   setMinFunctionAlignment(FunctionAlignment);
573   setPrefFunctionAlignment(FunctionAlignment);
574 
575   setMinimumJumpTableEntries(5);
576 
577   // Jumps are expensive, compared to logic
578   setJumpIsExpensive();
579 
580   // We can use any register for comparisons
581   setHasMultipleConditionRegisters();
582 
583   setTargetDAGCombine(ISD::SETCC);
584   if (Subtarget.hasStdExtZbp()) {
585     setTargetDAGCombine(ISD::OR);
586   }
587 }
588 
589 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
590                                             LLVMContext &Context,
591                                             EVT VT) const {
592   if (!VT.isVector())
593     return getPointerTy(DL);
594   if (Subtarget.hasStdExtV() && VT.isScalableVector())
595     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
596   return VT.changeVectorElementTypeToInteger();
597 }
598 
599 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
600                                              const CallInst &I,
601                                              MachineFunction &MF,
602                                              unsigned Intrinsic) const {
603   switch (Intrinsic) {
604   default:
605     return false;
606   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
607   case Intrinsic::riscv_masked_atomicrmw_add_i32:
608   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
609   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
610   case Intrinsic::riscv_masked_atomicrmw_max_i32:
611   case Intrinsic::riscv_masked_atomicrmw_min_i32:
612   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
613   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
614   case Intrinsic::riscv_masked_cmpxchg_i32:
615     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
616     Info.opc = ISD::INTRINSIC_W_CHAIN;
617     Info.memVT = MVT::getVT(PtrTy->getElementType());
618     Info.ptrVal = I.getArgOperand(0);
619     Info.offset = 0;
620     Info.align = Align(4);
621     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
622                  MachineMemOperand::MOVolatile;
623     return true;
624   }
625 }
626 
627 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
628                                                 const AddrMode &AM, Type *Ty,
629                                                 unsigned AS,
630                                                 Instruction *I) const {
631   // No global is ever allowed as a base.
632   if (AM.BaseGV)
633     return false;
634 
635   // Require a 12-bit signed offset.
636   if (!isInt<12>(AM.BaseOffs))
637     return false;
638 
639   switch (AM.Scale) {
640   case 0: // "r+i" or just "i", depending on HasBaseReg.
641     break;
642   case 1:
643     if (!AM.HasBaseReg) // allow "r+i".
644       break;
645     return false; // disallow "r+r" or "r+r+i".
646   default:
647     return false;
648   }
649 
650   return true;
651 }
652 
653 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
654   return isInt<12>(Imm);
655 }
656 
657 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
658   return isInt<12>(Imm);
659 }
660 
661 // On RV32, 64-bit integers are split into their high and low parts and held
662 // in two different registers, so the trunc is free since the low register can
663 // just be used.
664 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
665   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
666     return false;
667   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
668   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
669   return (SrcBits == 64 && DestBits == 32);
670 }
671 
672 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
673   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
674       !SrcVT.isInteger() || !DstVT.isInteger())
675     return false;
676   unsigned SrcBits = SrcVT.getSizeInBits();
677   unsigned DestBits = DstVT.getSizeInBits();
678   return (SrcBits == 64 && DestBits == 32);
679 }
680 
681 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
682   // Zexts are free if they can be combined with a load.
683   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
684     EVT MemVT = LD->getMemoryVT();
685     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
686          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
687         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
688          LD->getExtensionType() == ISD::ZEXTLOAD))
689       return true;
690   }
691 
692   return TargetLowering::isZExtFree(Val, VT2);
693 }
694 
695 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
696   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
697 }
698 
699 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
700   return Subtarget.hasStdExtZbb();
701 }
702 
703 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
704   return Subtarget.hasStdExtZbb();
705 }
706 
707 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
708                                        bool ForCodeSize) const {
709   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
710     return false;
711   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
712     return false;
713   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
714     return false;
715   if (Imm.isNegZero())
716     return false;
717   return Imm.isZero();
718 }
719 
720 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
721   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
722          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
723          (VT == MVT::f64 && Subtarget.hasStdExtD());
724 }
725 
726 // Changes the condition code and swaps operands if necessary, so the SetCC
727 // operation matches one of the comparisons supported directly in the RISC-V
728 // ISA.
729 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
730   switch (CC) {
731   default:
732     break;
733   case ISD::SETGT:
734   case ISD::SETLE:
735   case ISD::SETUGT:
736   case ISD::SETULE:
737     CC = ISD::getSetCCSwappedOperands(CC);
738     std::swap(LHS, RHS);
739     break;
740   }
741 }
742 
743 // Return the RISC-V branch opcode that matches the given DAG integer
744 // condition code. The CondCode must be one of those supported by the RISC-V
745 // ISA (see normaliseSetCC).
746 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
747   switch (CC) {
748   default:
749     llvm_unreachable("Unsupported CondCode");
750   case ISD::SETEQ:
751     return RISCV::BEQ;
752   case ISD::SETNE:
753     return RISCV::BNE;
754   case ISD::SETLT:
755     return RISCV::BLT;
756   case ISD::SETGE:
757     return RISCV::BGE;
758   case ISD::SETULT:
759     return RISCV::BLTU;
760   case ISD::SETUGE:
761     return RISCV::BGEU;
762   }
763 }
764 
765 // Return the largest legal scalable vector type that matches VT's element type.
766 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
767                                             const RISCVSubtarget &Subtarget) {
768   assert(VT.isFixedLengthVector() &&
769          DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
770          "Expected legal fixed length vector!");
771 
772   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
773   assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
774 
775   switch (VT.getVectorElementType().SimpleTy) {
776   default:
777     llvm_unreachable("unexpected element type for RVV container");
778   case MVT::i8:
779     return MVT::getScalableVectorVT(MVT::i8, LMul * 8);
780   case MVT::i16:
781     return MVT::getScalableVectorVT(MVT::i16, LMul * 4);
782   case MVT::i32:
783     return MVT::getScalableVectorVT(MVT::i32, LMul * 2);
784   case MVT::i64:
785     return MVT::getScalableVectorVT(MVT::i64, LMul);
786   case MVT::f16:
787     return MVT::getScalableVectorVT(MVT::f16, LMul * 4);
788   case MVT::f32:
789     return MVT::getScalableVectorVT(MVT::f32, LMul * 2);
790   case MVT::f64:
791     return MVT::getScalableVectorVT(MVT::f64, LMul);
792   }
793 }
794 
795 // Grow V to consume an entire RVV register.
796 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
797                                        const RISCVSubtarget &Subtarget) {
798   assert(VT.isScalableVector() &&
799          "Expected to convert into a scalable vector!");
800   assert(V.getValueType().isFixedLengthVector() &&
801          "Expected a fixed length vector operand!");
802   SDLoc DL(V);
803   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
804   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
805 }
806 
807 // Shrink V so it's just big enough to maintain a VT's worth of data.
808 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
809                                          const RISCVSubtarget &Subtarget) {
810   assert(VT.isFixedLengthVector() &&
811          "Expected to convert into a fixed length vector!");
812   assert(V.getValueType().isScalableVector() &&
813          "Expected a scalable vector operand!");
814   SDLoc DL(V);
815   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
816   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
817 }
818 
819 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
820                                  const RISCVSubtarget &Subtarget) {
821   MVT VT = Op.getSimpleValueType();
822   assert(VT.isFixedLengthVector() && "Unexpected vector!");
823 
824   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
825     MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
826 
827     SDLoc DL(Op);
828     SDValue VL =
829         DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
830 
831     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
832                                         : RISCVISD::VMV_V_X_VL;
833     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
834     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
835   }
836 
837   return SDValue();
838 }
839 
840 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
841                                             SelectionDAG &DAG) const {
842   switch (Op.getOpcode()) {
843   default:
844     report_fatal_error("unimplemented operand");
845   case ISD::GlobalAddress:
846     return lowerGlobalAddress(Op, DAG);
847   case ISD::BlockAddress:
848     return lowerBlockAddress(Op, DAG);
849   case ISD::ConstantPool:
850     return lowerConstantPool(Op, DAG);
851   case ISD::JumpTable:
852     return lowerJumpTable(Op, DAG);
853   case ISD::GlobalTLSAddress:
854     return lowerGlobalTLSAddress(Op, DAG);
855   case ISD::SELECT:
856     return lowerSELECT(Op, DAG);
857   case ISD::VASTART:
858     return lowerVASTART(Op, DAG);
859   case ISD::FRAMEADDR:
860     return lowerFRAMEADDR(Op, DAG);
861   case ISD::RETURNADDR:
862     return lowerRETURNADDR(Op, DAG);
863   case ISD::SHL_PARTS:
864     return lowerShiftLeftParts(Op, DAG);
865   case ISD::SRA_PARTS:
866     return lowerShiftRightParts(Op, DAG, true);
867   case ISD::SRL_PARTS:
868     return lowerShiftRightParts(Op, DAG, false);
869   case ISD::BITCAST: {
870     assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
871             Subtarget.hasStdExtZfh()) &&
872            "Unexpected custom legalisation");
873     SDLoc DL(Op);
874     SDValue Op0 = Op.getOperand(0);
875     if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
876       if (Op0.getValueType() != MVT::i16)
877         return SDValue();
878       SDValue NewOp0 =
879           DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
880       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
881       return FPConv;
882     } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
883                Subtarget.hasStdExtF()) {
884       if (Op0.getValueType() != MVT::i32)
885         return SDValue();
886       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
887       SDValue FPConv =
888           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
889       return FPConv;
890     }
891     return SDValue();
892   }
893   case ISD::INTRINSIC_WO_CHAIN:
894     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
895   case ISD::INTRINSIC_W_CHAIN:
896     return LowerINTRINSIC_W_CHAIN(Op, DAG);
897   case ISD::BSWAP:
898   case ISD::BITREVERSE: {
899     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
900     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
901     MVT VT = Op.getSimpleValueType();
902     SDLoc DL(Op);
903     // Start with the maximum immediate value which is the bitwidth - 1.
904     unsigned Imm = VT.getSizeInBits() - 1;
905     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
906     if (Op.getOpcode() == ISD::BSWAP)
907       Imm &= ~0x7U;
908     return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
909                        DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
910   }
911   case ISD::FSHL:
912   case ISD::FSHR: {
913     MVT VT = Op.getSimpleValueType();
914     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
915     SDLoc DL(Op);
916     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
917     // use log(XLen) bits. Mask the shift amount accordingly.
918     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
919     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
920                                 DAG.getConstant(ShAmtWidth, DL, VT));
921     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
922     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
923   }
924   case ISD::TRUNCATE: {
925     SDLoc DL(Op);
926     EVT VT = Op.getValueType();
927     // Only custom-lower vector truncates
928     if (!VT.isVector())
929       return Op;
930 
931     // Truncates to mask types are handled differently
932     if (VT.getVectorElementType() == MVT::i1)
933       return lowerVectorMaskTrunc(Op, DAG);
934 
935     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
936     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which
937     // truncate by one power of two at a time.
938     EVT DstEltVT = VT.getVectorElementType();
939 
940     SDValue Src = Op.getOperand(0);
941     EVT SrcVT = Src.getValueType();
942     EVT SrcEltVT = SrcVT.getVectorElementType();
943 
944     assert(DstEltVT.bitsLT(SrcEltVT) &&
945            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
946            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
947            "Unexpected vector truncate lowering");
948 
949     SDValue Result = Src;
950     LLVMContext &Context = *DAG.getContext();
951     const ElementCount Count = SrcVT.getVectorElementCount();
952     do {
953       SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2);
954       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
955       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result);
956     } while (SrcEltVT != DstEltVT);
957 
958     return Result;
959   }
960   case ISD::ANY_EXTEND:
961   case ISD::ZERO_EXTEND:
962     return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
963   case ISD::SIGN_EXTEND:
964     return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
965   case ISD::SPLAT_VECTOR:
966     return lowerSPLATVECTOR(Op, DAG);
967   case ISD::INSERT_VECTOR_ELT:
968     return lowerINSERT_VECTOR_ELT(Op, DAG);
969   case ISD::EXTRACT_VECTOR_ELT:
970     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
971   case ISD::VSCALE: {
972     MVT VT = Op.getSimpleValueType();
973     SDLoc DL(Op);
974     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
975     // We define our scalable vector types for lmul=1 to use a 64 bit known
976     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
977     // vscale as VLENB / 8.
978     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
979                                  DAG.getConstant(3, DL, VT));
980     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
981   }
982   case ISD::FP_EXTEND: {
983     // RVV can only do fp_extend to types double the size as the source. We
984     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
985     // via f32.
986     MVT VT = Op.getSimpleValueType();
987     MVT SrcVT = Op.getOperand(0).getSimpleValueType();
988     // We only need to close the gap between vXf16->vXf64.
989     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
990         SrcVT.getVectorElementType() != MVT::f16)
991       return Op;
992     SDLoc DL(Op);
993     MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
994     SDValue IntermediateRound =
995         DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT);
996     return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
997   }
998   case ISD::FP_ROUND: {
999     // RVV can only do fp_round to types half the size as the source. We
1000     // custom-lower f64->f16 rounds via RVV's round-to-odd float
1001     // conversion instruction.
1002     MVT VT = Op.getSimpleValueType();
1003     MVT SrcVT = Op.getOperand(0).getSimpleValueType();
1004     // We only need to close the gap between vXf64<->vXf16.
1005     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
1006         SrcVT.getVectorElementType() != MVT::f64)
1007       return Op;
1008     SDLoc DL(Op);
1009     MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1010     SDValue IntermediateRound =
1011         DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0));
1012     return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
1013   }
1014   case ISD::FP_TO_SINT:
1015   case ISD::FP_TO_UINT:
1016   case ISD::SINT_TO_FP:
1017   case ISD::UINT_TO_FP: {
1018     // RVV can only do fp<->int conversions to types half/double the size as
1019     // the source. We custom-lower any conversions that do two hops into
1020     // sequences.
1021     MVT VT = Op.getSimpleValueType();
1022     if (!VT.isVector())
1023       return Op;
1024     SDLoc DL(Op);
1025     SDValue Src = Op.getOperand(0);
1026     MVT EltVT = VT.getVectorElementType();
1027     MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType();
1028     unsigned EltSize = EltVT.getSizeInBits();
1029     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
1030     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
1031            "Unexpected vector element types");
1032     bool IsInt2FP = SrcEltVT.isInteger();
1033     // Widening conversions
1034     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
1035       if (IsInt2FP) {
1036         // Do a regular integer sign/zero extension then convert to float.
1037         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
1038                                       VT.getVectorElementCount());
1039         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
1040                                  ? ISD::ZERO_EXTEND
1041                                  : ISD::SIGN_EXTEND;
1042         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
1043         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
1044       }
1045       // FP2Int
1046       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
1047       // Do one doubling fp_extend then complete the operation by converting
1048       // to int.
1049       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1050       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
1051       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
1052     }
1053 
1054     // Narrowing conversions
1055     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
1056       if (IsInt2FP) {
1057         // One narrowing int_to_fp, then an fp_round.
1058         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
1059         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1060         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
1061         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
1062       }
1063       // FP2Int
1064       // One narrowing fp_to_int, then truncate the integer. If the float isn't
1065       // representable by the integer, the result is poison.
1066       MVT IVecVT =
1067           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
1068                            VT.getVectorElementCount());
1069       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
1070       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
1071     }
1072 
1073     return Op;
1074   }
1075   case ISD::VECREDUCE_ADD:
1076   case ISD::VECREDUCE_UMAX:
1077   case ISD::VECREDUCE_SMAX:
1078   case ISD::VECREDUCE_UMIN:
1079   case ISD::VECREDUCE_SMIN:
1080   case ISD::VECREDUCE_AND:
1081   case ISD::VECREDUCE_OR:
1082   case ISD::VECREDUCE_XOR:
1083     return lowerVECREDUCE(Op, DAG);
1084   case ISD::VECREDUCE_FADD:
1085   case ISD::VECREDUCE_SEQ_FADD:
1086     return lowerFPVECREDUCE(Op, DAG);
1087   case ISD::BUILD_VECTOR:
1088     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
1089   case ISD::LOAD:
1090     return lowerFixedLengthVectorLoadToRVV(Op, DAG);
1091   case ISD::STORE:
1092     return lowerFixedLengthVectorStoreToRVV(Op, DAG);
1093   case ISD::ADD:
1094     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
1095   case ISD::SUB:
1096     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
1097   case ISD::MUL:
1098     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
1099   case ISD::AND:
1100     return lowerToScalableOp(Op, DAG, RISCVISD::AND_VL);
1101   case ISD::OR:
1102     return lowerToScalableOp(Op, DAG, RISCVISD::OR_VL);
1103   case ISD::XOR:
1104     return lowerToScalableOp(Op, DAG, RISCVISD::XOR_VL);
1105   case ISD::SDIV:
1106     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
1107   case ISD::SREM:
1108     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
1109   case ISD::UDIV:
1110     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
1111   case ISD::UREM:
1112     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
1113   case ISD::SHL:
1114     return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
1115   case ISD::SRA:
1116     return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
1117   case ISD::SRL:
1118     return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
1119   case ISD::FADD:
1120     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
1121   case ISD::FSUB:
1122     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
1123   case ISD::FMUL:
1124     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
1125   case ISD::FDIV:
1126     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
1127   case ISD::FNEG:
1128     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
1129   case ISD::FMA:
1130     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
1131   }
1132 }
1133 
1134 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1135                              SelectionDAG &DAG, unsigned Flags) {
1136   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1137 }
1138 
1139 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1140                              SelectionDAG &DAG, unsigned Flags) {
1141   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1142                                    Flags);
1143 }
1144 
1145 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1146                              SelectionDAG &DAG, unsigned Flags) {
1147   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1148                                    N->getOffset(), Flags);
1149 }
1150 
1151 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1152                              SelectionDAG &DAG, unsigned Flags) {
1153   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1154 }
1155 
1156 template <class NodeTy>
1157 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1158                                      bool IsLocal) const {
1159   SDLoc DL(N);
1160   EVT Ty = getPointerTy(DAG.getDataLayout());
1161 
1162   if (isPositionIndependent()) {
1163     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1164     if (IsLocal)
1165       // Use PC-relative addressing to access the symbol. This generates the
1166       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
1167       // %pcrel_lo(auipc)).
1168       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1169 
1170     // Use PC-relative addressing to access the GOT for this symbol, then load
1171     // the address from the GOT. This generates the pattern (PseudoLA sym),
1172     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
1173     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
1174   }
1175 
1176   switch (getTargetMachine().getCodeModel()) {
1177   default:
1178     report_fatal_error("Unsupported code model for lowering");
1179   case CodeModel::Small: {
1180     // Generate a sequence for accessing addresses within the first 2 GiB of
1181     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
1182     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
1183     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
1184     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1185     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
1186   }
1187   case CodeModel::Medium: {
1188     // Generate a sequence for accessing addresses within any 2GiB range within
1189     // the address space. This generates the pattern (PseudoLLA sym), which
1190     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
1191     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1192     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1193   }
1194   }
1195 }
1196 
1197 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
1198                                                 SelectionDAG &DAG) const {
1199   SDLoc DL(Op);
1200   EVT Ty = Op.getValueType();
1201   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1202   int64_t Offset = N->getOffset();
1203   MVT XLenVT = Subtarget.getXLenVT();
1204 
1205   const GlobalValue *GV = N->getGlobal();
1206   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1207   SDValue Addr = getAddr(N, DAG, IsLocal);
1208 
1209   // In order to maximise the opportunity for common subexpression elimination,
1210   // emit a separate ADD node for the global address offset instead of folding
1211   // it in the global address node. Later peephole optimisations may choose to
1212   // fold it back in when profitable.
1213   if (Offset != 0)
1214     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1215                        DAG.getConstant(Offset, DL, XLenVT));
1216   return Addr;
1217 }
1218 
1219 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
1220                                                SelectionDAG &DAG) const {
1221   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
1222 
1223   return getAddr(N, DAG);
1224 }
1225 
1226 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
1227                                                SelectionDAG &DAG) const {
1228   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
1229 
1230   return getAddr(N, DAG);
1231 }
1232 
1233 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
1234                                             SelectionDAG &DAG) const {
1235   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
1236 
1237   return getAddr(N, DAG);
1238 }
1239 
1240 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1241                                               SelectionDAG &DAG,
1242                                               bool UseGOT) const {
1243   SDLoc DL(N);
1244   EVT Ty = getPointerTy(DAG.getDataLayout());
1245   const GlobalValue *GV = N->getGlobal();
1246   MVT XLenVT = Subtarget.getXLenVT();
1247 
1248   if (UseGOT) {
1249     // Use PC-relative addressing to access the GOT for this TLS symbol, then
1250     // load the address from the GOT and add the thread pointer. This generates
1251     // the pattern (PseudoLA_TLS_IE sym), which expands to
1252     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
1253     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1254     SDValue Load =
1255         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
1256 
1257     // Add the thread pointer.
1258     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1259     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
1260   }
1261 
1262   // Generate a sequence for accessing the address relative to the thread
1263   // pointer, with the appropriate adjustment for the thread pointer offset.
1264   // This generates the pattern
1265   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
1266   SDValue AddrHi =
1267       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
1268   SDValue AddrAdd =
1269       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
1270   SDValue AddrLo =
1271       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
1272 
1273   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1274   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1275   SDValue MNAdd = SDValue(
1276       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
1277       0);
1278   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
1279 }
1280 
1281 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1282                                                SelectionDAG &DAG) const {
1283   SDLoc DL(N);
1284   EVT Ty = getPointerTy(DAG.getDataLayout());
1285   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1286   const GlobalValue *GV = N->getGlobal();
1287 
1288   // Use a PC-relative addressing mode to access the global dynamic GOT address.
1289   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
1290   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
1291   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1292   SDValue Load =
1293       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
1294 
1295   // Prepare argument list to generate call.
1296   ArgListTy Args;
1297   ArgListEntry Entry;
1298   Entry.Node = Load;
1299   Entry.Ty = CallTy;
1300   Args.push_back(Entry);
1301 
1302   // Setup call to __tls_get_addr.
1303   TargetLowering::CallLoweringInfo CLI(DAG);
1304   CLI.setDebugLoc(DL)
1305       .setChain(DAG.getEntryNode())
1306       .setLibCallee(CallingConv::C, CallTy,
1307                     DAG.getExternalSymbol("__tls_get_addr", Ty),
1308                     std::move(Args));
1309 
1310   return LowerCallTo(CLI).first;
1311 }
1312 
1313 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1314                                                    SelectionDAG &DAG) const {
1315   SDLoc DL(Op);
1316   EVT Ty = Op.getValueType();
1317   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1318   int64_t Offset = N->getOffset();
1319   MVT XLenVT = Subtarget.getXLenVT();
1320 
1321   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
1322 
1323   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1324       CallingConv::GHC)
1325     report_fatal_error("In GHC calling convention TLS is not supported");
1326 
1327   SDValue Addr;
1328   switch (Model) {
1329   case TLSModel::LocalExec:
1330     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
1331     break;
1332   case TLSModel::InitialExec:
1333     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
1334     break;
1335   case TLSModel::LocalDynamic:
1336   case TLSModel::GeneralDynamic:
1337     Addr = getDynamicTLSAddr(N, DAG);
1338     break;
1339   }
1340 
1341   // In order to maximise the opportunity for common subexpression elimination,
1342   // emit a separate ADD node for the global address offset instead of folding
1343   // it in the global address node. Later peephole optimisations may choose to
1344   // fold it back in when profitable.
1345   if (Offset != 0)
1346     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1347                        DAG.getConstant(Offset, DL, XLenVT));
1348   return Addr;
1349 }
1350 
1351 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
1352   SDValue CondV = Op.getOperand(0);
1353   SDValue TrueV = Op.getOperand(1);
1354   SDValue FalseV = Op.getOperand(2);
1355   SDLoc DL(Op);
1356   MVT XLenVT = Subtarget.getXLenVT();
1357 
1358   // If the result type is XLenVT and CondV is the output of a SETCC node
1359   // which also operated on XLenVT inputs, then merge the SETCC node into the
1360   // lowered RISCVISD::SELECT_CC to take advantage of the integer
1361   // compare+branch instructions. i.e.:
1362   // (select (setcc lhs, rhs, cc), truev, falsev)
1363   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
1364   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
1365       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
1366     SDValue LHS = CondV.getOperand(0);
1367     SDValue RHS = CondV.getOperand(1);
1368     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
1369     ISD::CondCode CCVal = CC->get();
1370 
1371     normaliseSetCC(LHS, RHS, CCVal);
1372 
1373     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
1374     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1375     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1376   }
1377 
1378   // Otherwise:
1379   // (select condv, truev, falsev)
1380   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
1381   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
1382   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
1383 
1384   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1385 
1386   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1387 }
1388 
1389 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1390   MachineFunction &MF = DAG.getMachineFunction();
1391   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
1392 
1393   SDLoc DL(Op);
1394   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1395                                  getPointerTy(MF.getDataLayout()));
1396 
1397   // vastart just stores the address of the VarArgsFrameIndex slot into the
1398   // memory location argument.
1399   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1400   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1401                       MachinePointerInfo(SV));
1402 }
1403 
1404 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
1405                                             SelectionDAG &DAG) const {
1406   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1407   MachineFunction &MF = DAG.getMachineFunction();
1408   MachineFrameInfo &MFI = MF.getFrameInfo();
1409   MFI.setFrameAddressIsTaken(true);
1410   Register FrameReg = RI.getFrameRegister(MF);
1411   int XLenInBytes = Subtarget.getXLen() / 8;
1412 
1413   EVT VT = Op.getValueType();
1414   SDLoc DL(Op);
1415   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1416   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1417   while (Depth--) {
1418     int Offset = -(XLenInBytes * 2);
1419     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1420                               DAG.getIntPtrConstant(Offset, DL));
1421     FrameAddr =
1422         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1423   }
1424   return FrameAddr;
1425 }
1426 
1427 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
1428                                              SelectionDAG &DAG) const {
1429   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1430   MachineFunction &MF = DAG.getMachineFunction();
1431   MachineFrameInfo &MFI = MF.getFrameInfo();
1432   MFI.setReturnAddressIsTaken(true);
1433   MVT XLenVT = Subtarget.getXLenVT();
1434   int XLenInBytes = Subtarget.getXLen() / 8;
1435 
1436   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1437     return SDValue();
1438 
1439   EVT VT = Op.getValueType();
1440   SDLoc DL(Op);
1441   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1442   if (Depth) {
1443     int Off = -XLenInBytes;
1444     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
1445     SDValue Offset = DAG.getConstant(Off, DL, VT);
1446     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1447                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1448                        MachinePointerInfo());
1449   }
1450 
1451   // Return the value of the return address register, marking it an implicit
1452   // live-in.
1453   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
1454   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
1455 }
1456 
1457 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
1458                                                  SelectionDAG &DAG) const {
1459   SDLoc DL(Op);
1460   SDValue Lo = Op.getOperand(0);
1461   SDValue Hi = Op.getOperand(1);
1462   SDValue Shamt = Op.getOperand(2);
1463   EVT VT = Lo.getValueType();
1464 
1465   // if Shamt-XLEN < 0: // Shamt < XLEN
1466   //   Lo = Lo << Shamt
1467   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
1468   // else:
1469   //   Lo = 0
1470   //   Hi = Lo << (Shamt-XLEN)
1471 
1472   SDValue Zero = DAG.getConstant(0, DL, VT);
1473   SDValue One = DAG.getConstant(1, DL, VT);
1474   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1475   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1476   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1477   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1478 
1479   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1480   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1481   SDValue ShiftRightLo =
1482       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
1483   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1484   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1485   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
1486 
1487   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1488 
1489   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1490   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1491 
1492   SDValue Parts[2] = {Lo, Hi};
1493   return DAG.getMergeValues(Parts, DL);
1494 }
1495 
1496 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
1497                                                   bool IsSRA) const {
1498   SDLoc DL(Op);
1499   SDValue Lo = Op.getOperand(0);
1500   SDValue Hi = Op.getOperand(1);
1501   SDValue Shamt = Op.getOperand(2);
1502   EVT VT = Lo.getValueType();
1503 
1504   // SRA expansion:
1505   //   if Shamt-XLEN < 0: // Shamt < XLEN
1506   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1507   //     Hi = Hi >>s Shamt
1508   //   else:
1509   //     Lo = Hi >>s (Shamt-XLEN);
1510   //     Hi = Hi >>s (XLEN-1)
1511   //
1512   // SRL expansion:
1513   //   if Shamt-XLEN < 0: // Shamt < XLEN
1514   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1515   //     Hi = Hi >>u Shamt
1516   //   else:
1517   //     Lo = Hi >>u (Shamt-XLEN);
1518   //     Hi = 0;
1519 
1520   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1521 
1522   SDValue Zero = DAG.getConstant(0, DL, VT);
1523   SDValue One = DAG.getConstant(1, DL, VT);
1524   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1525   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1526   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1527   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1528 
1529   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1530   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1531   SDValue ShiftLeftHi =
1532       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
1533   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1534   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1535   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
1536   SDValue HiFalse =
1537       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
1538 
1539   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1540 
1541   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1542   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1543 
1544   SDValue Parts[2] = {Lo, Hi};
1545   return DAG.getMergeValues(Parts, DL);
1546 }
1547 
1548 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is
1549 // illegal (currently only vXi64 RV32).
1550 // FIXME: We could also catch non-constant sign-extended i32 values and lower
1551 // them to SPLAT_VECTOR_I64
1552 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op,
1553                                               SelectionDAG &DAG) const {
1554   SDLoc DL(Op);
1555   EVT VecVT = Op.getValueType();
1556   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
1557          "Unexpected SPLAT_VECTOR lowering");
1558   SDValue SplatVal = Op.getOperand(0);
1559 
1560   // If we can prove that the value is a sign-extended 32-bit value, lower this
1561   // as a custom node in order to try and match RVV vector/scalar instructions.
1562   if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) {
1563     if (isInt<32>(CVal->getSExtValue()))
1564       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1565                          DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32));
1566   }
1567 
1568   if (SplatVal.getOpcode() == ISD::SIGN_EXTEND &&
1569       SplatVal.getOperand(0).getValueType() == MVT::i32) {
1570     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1571                        SplatVal.getOperand(0));
1572   }
1573 
1574   // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
1575   // to accidentally sign-extend the 32-bit halves to the e64 SEW:
1576   // vmv.v.x vX, hi
1577   // vsll.vx vX, vX, /*32*/
1578   // vmv.v.x vY, lo
1579   // vsll.vx vY, vY, /*32*/
1580   // vsrl.vx vY, vY, /*32*/
1581   // vor.vv vX, vX, vY
1582   SDValue One = DAG.getConstant(1, DL, MVT::i32);
1583   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
1584   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
1585   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero);
1586   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One);
1587 
1588   Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
1589   Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
1590   Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
1591 
1592   if (isNullConstant(Hi))
1593     return Lo;
1594 
1595   Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
1596   Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
1597 
1598   return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
1599 }
1600 
1601 // Custom-lower extensions from mask vectors by using a vselect either with 1
1602 // for zero/any-extension or -1 for sign-extension:
1603 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
1604 // Note that any-extension is lowered identically to zero-extension.
1605 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
1606                                                 int64_t ExtTrueVal) const {
1607   SDLoc DL(Op);
1608   EVT VecVT = Op.getValueType();
1609   SDValue Src = Op.getOperand(0);
1610   // Only custom-lower extensions from mask types
1611   if (!Src.getValueType().isVector() ||
1612       Src.getValueType().getVectorElementType() != MVT::i1)
1613     return Op;
1614 
1615   // Be careful not to introduce illegal scalar types at this stage, and be
1616   // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
1617   // illegal and must be expanded. Since we know that the constants are
1618   // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
1619   bool IsRV32E64 =
1620       !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
1621   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1622   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT());
1623 
1624   if (!IsRV32E64) {
1625     SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
1626     SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
1627   } else {
1628     SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
1629     SplatTrueVal =
1630         DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
1631   }
1632 
1633   return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
1634 }
1635 
1636 // Custom-lower truncations from vectors to mask vectors by using a mask and a
1637 // setcc operation:
1638 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
1639 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
1640                                                   SelectionDAG &DAG) const {
1641   SDLoc DL(Op);
1642   EVT MaskVT = Op.getValueType();
1643   // Only expect to custom-lower truncations to mask types
1644   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
1645          "Unexpected type for vector mask lowering");
1646   SDValue Src = Op.getOperand(0);
1647   EVT VecVT = Src.getValueType();
1648 
1649   // Be careful not to introduce illegal scalar types at this stage, and be
1650   // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
1651   // illegal and must be expanded. Since we know that the constants are
1652   // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
1653   bool IsRV32E64 =
1654       !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
1655   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
1656   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1657 
1658   if (!IsRV32E64) {
1659     SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne);
1660     SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
1661   } else {
1662     SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne);
1663     SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
1664   }
1665 
1666   SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
1667 
1668   return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
1669 }
1670 
1671 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1672                                                     SelectionDAG &DAG) const {
1673   SDLoc DL(Op);
1674   EVT VecVT = Op.getValueType();
1675   SDValue Vec = Op.getOperand(0);
1676   SDValue Val = Op.getOperand(1);
1677   SDValue Idx = Op.getOperand(2);
1678 
1679   // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is
1680   // first slid down into position, the value is inserted into the first
1681   // position, and the vector is slid back up. We do this to simplify patterns.
1682   //   (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx),
1683   if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) {
1684     if (isNullConstant(Idx))
1685       return Op;
1686     SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
1687                                     DAG.getUNDEF(VecVT), Vec, Idx);
1688     SDValue InsertElt0 =
1689         DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val,
1690                     DAG.getConstant(0, DL, Subtarget.getXLenVT()));
1691 
1692     return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx);
1693   }
1694 
1695   // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
1696   // is illegal (currently only vXi64 RV32).
1697   // Since there is no easy way of getting a single element into a vector when
1698   // XLEN<SEW, we lower the operation to the following sequence:
1699   //   splat      vVal, rVal
1700   //   vid.v      vVid
1701   //   vmseq.vx   mMask, vVid, rIdx
1702   //   vmerge.vvm vDest, vSrc, vVal, mMask
1703   // This essentially merges the original vector with the inserted element by
1704   // using a mask whose only set bit is that corresponding to the insert
1705   // index.
1706   SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val);
1707   SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx);
1708 
1709   SDValue VID = DAG.getNode(RISCVISD::VID, DL, VecVT);
1710   auto SetCCVT =
1711       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT);
1712   SDValue Mask = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ);
1713 
1714   return DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, SplattedVal, Vec);
1715 }
1716 
1717 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
1718 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
1719 // types this is done using VMV_X_S to allow us to glean information about the
1720 // sign bits of the result.
1721 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1722                                                      SelectionDAG &DAG) const {
1723   SDLoc DL(Op);
1724   SDValue Idx = Op.getOperand(1);
1725   SDValue Vec = Op.getOperand(0);
1726   EVT EltVT = Op.getValueType();
1727   EVT VecVT = Vec.getValueType();
1728   MVT XLenVT = Subtarget.getXLenVT();
1729 
1730   // If the index is 0, the vector is already in the right position.
1731   if (!isNullConstant(Idx)) {
1732     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, DAG.getUNDEF(VecVT), Vec,
1733                       Idx);
1734   }
1735 
1736   if (!EltVT.isInteger()) {
1737     // Floating-point extracts are handled in TableGen.
1738     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
1739                        DAG.getConstant(0, DL, XLenVT));
1740   }
1741 
1742   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
1743   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
1744 }
1745 
1746 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1747                                                      SelectionDAG &DAG) const {
1748   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1749   SDLoc DL(Op);
1750 
1751   if (Subtarget.hasStdExtV()) {
1752     // Some RVV intrinsics may claim that they want an integer operand to be
1753     // extended.
1754     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1755             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1756       if (II->ExtendedOperand) {
1757         assert(II->ExtendedOperand < Op.getNumOperands());
1758         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1759         SDValue &ScalarOp = Operands[II->ExtendedOperand];
1760         EVT OpVT = ScalarOp.getValueType();
1761         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1762             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1763           // If the operand is a constant, sign extend to increase our chances
1764           // of being able to use a .vi instruction. ANY_EXTEND would become a
1765           // a zero extend and the simm5 check in isel would fail.
1766           // FIXME: Should we ignore the upper bits in isel instead?
1767           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1768                                                           : ISD::ANY_EXTEND;
1769           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1770           return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
1771                              Operands);
1772         }
1773       }
1774     }
1775   }
1776 
1777   switch (IntNo) {
1778   default:
1779     return SDValue();    // Don't custom lower most intrinsics.
1780   case Intrinsic::thread_pointer: {
1781     EVT PtrVT = getPointerTy(DAG.getDataLayout());
1782     return DAG.getRegister(RISCV::X4, PtrVT);
1783   }
1784   case Intrinsic::riscv_vmv_x_s:
1785     assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!");
1786     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
1787                        Op.getOperand(1));
1788   case Intrinsic::riscv_vmv_v_x: {
1789     SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(),
1790                                  Op.getOperand(1));
1791     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(),
1792                        Scalar, Op.getOperand(2));
1793   }
1794   case Intrinsic::riscv_vfmv_v_f:
1795     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
1796                        Op.getOperand(1), Op.getOperand(2));
1797   }
1798 }
1799 
1800 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
1801                                                     SelectionDAG &DAG) const {
1802   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1803   SDLoc DL(Op);
1804 
1805   if (Subtarget.hasStdExtV()) {
1806     // Some RVV intrinsics may claim that they want an integer operand to be
1807     // extended.
1808     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1809             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1810       if (II->ExtendedOperand) {
1811         // The operands start from the second argument in INTRINSIC_W_CHAIN.
1812         unsigned ExtendOp = II->ExtendedOperand + 1;
1813         assert(ExtendOp < Op.getNumOperands());
1814         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1815         SDValue &ScalarOp = Operands[ExtendOp];
1816         EVT OpVT = ScalarOp.getValueType();
1817         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1818             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1819           // If the operand is a constant, sign extend to increase our chances
1820           // of being able to use a .vi instruction. ANY_EXTEND would become a
1821           // a zero extend and the simm5 check in isel would fail.
1822           // FIXME: Should we ignore the upper bits in isel instead?
1823           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1824                                                           : ISD::ANY_EXTEND;
1825           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1826           return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(),
1827                              Operands);
1828         }
1829       }
1830     }
1831   }
1832 
1833   switch (IntNo) {
1834   default:
1835     return SDValue(); // Don't custom lower most intrinsics.
1836   case Intrinsic::riscv_vleff: {
1837     SDLoc DL(Op);
1838     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
1839     SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0),
1840                                Op.getOperand(2), Op.getOperand(3));
1841     SDValue ReadVL =
1842         SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1),
1843                                    Load.getValue(2)),
1844                 0);
1845     return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
1846   }
1847   case Intrinsic::riscv_vleff_mask: {
1848     SDLoc DL(Op);
1849     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
1850     SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0),
1851                                Op.getOperand(2), Op.getOperand(3),
1852                                Op.getOperand(4), Op.getOperand(5));
1853     SDValue ReadVL =
1854         SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1),
1855                                    Load.getValue(2)),
1856                 0);
1857     return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
1858   }
1859   }
1860 }
1861 
1862 static std::pair<unsigned, uint64_t>
1863 getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) {
1864   switch (ISDOpcode) {
1865   default:
1866     llvm_unreachable("Unhandled reduction");
1867   case ISD::VECREDUCE_ADD:
1868     return {RISCVISD::VECREDUCE_ADD, 0};
1869   case ISD::VECREDUCE_UMAX:
1870     return {RISCVISD::VECREDUCE_UMAX, 0};
1871   case ISD::VECREDUCE_SMAX:
1872     return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)};
1873   case ISD::VECREDUCE_UMIN:
1874     return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)};
1875   case ISD::VECREDUCE_SMIN:
1876     return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)};
1877   case ISD::VECREDUCE_AND:
1878     return {RISCVISD::VECREDUCE_AND, -1};
1879   case ISD::VECREDUCE_OR:
1880     return {RISCVISD::VECREDUCE_OR, 0};
1881   case ISD::VECREDUCE_XOR:
1882     return {RISCVISD::VECREDUCE_XOR, 0};
1883   }
1884 }
1885 
1886 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV
1887 // reduction opcode. Note that this returns a vector type, which must be
1888 // further processed to access the scalar result in element 0.
1889 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
1890                                             SelectionDAG &DAG) const {
1891   SDLoc DL(Op);
1892   assert(Op.getValueType().isSimple() &&
1893          Op.getOperand(0).getValueType().isSimple() &&
1894          "Unexpected vector-reduce lowering");
1895   MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType();
1896   unsigned RVVOpcode;
1897   uint64_t IdentityVal;
1898   std::tie(RVVOpcode, IdentityVal) =
1899       getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits());
1900   // We have to perform a bit of a dance to get from our vector type to the
1901   // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector
1902   // element type to find the type which fills a single register. Be careful to
1903   // use the operand's vector element type rather than the reduction's value
1904   // type, as that has likely been extended to XLEN.
1905   unsigned NumElts = 64 / VecEltVT.getSizeInBits();
1906   MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts);
1907   SDValue IdentitySplat =
1908       DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT));
1909   SDValue Reduction =
1910       DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat);
1911   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
1912                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
1913   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
1914 }
1915 
1916 // Given a reduction op, this function returns the matching reduction opcode,
1917 // the vector SDValue and the scalar SDValue required to lower this to a
1918 // RISCVISD node.
1919 static std::tuple<unsigned, SDValue, SDValue>
1920 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
1921   SDLoc DL(Op);
1922   switch (Op.getOpcode()) {
1923   default:
1924     llvm_unreachable("Unhandled reduction");
1925   case ISD::VECREDUCE_FADD:
1926     return std::make_tuple(RISCVISD::VECREDUCE_FADD, Op.getOperand(0),
1927                            DAG.getConstantFP(0.0, DL, EltVT));
1928   case ISD::VECREDUCE_SEQ_FADD:
1929     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD, Op.getOperand(1),
1930                            Op.getOperand(0));
1931   }
1932 }
1933 
1934 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
1935                                               SelectionDAG &DAG) const {
1936   SDLoc DL(Op);
1937   MVT VecEltVT = Op.getSimpleValueType();
1938   // We have to perform a bit of a dance to get from our vector type to the
1939   // correct LMUL=1 vector type. See above for an explanation.
1940   unsigned NumElts = 64 / VecEltVT.getSizeInBits();
1941   MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts);
1942 
1943   unsigned RVVOpcode;
1944   SDValue VectorVal, ScalarVal;
1945   std::tie(RVVOpcode, VectorVal, ScalarVal) =
1946       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
1947 
1948   SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
1949   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat);
1950   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
1951                      DAG.getConstant(0, DL, Subtarget.getXLenVT()));
1952 }
1953 
1954 SDValue
1955 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
1956                                                      SelectionDAG &DAG) const {
1957   auto *Load = cast<LoadSDNode>(Op);
1958 
1959   SDLoc DL(Op);
1960   MVT VT = Op.getSimpleValueType();
1961   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1962 
1963   SDValue VL =
1964       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
1965 
1966   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
1967   SDValue NewLoad = DAG.getMemIntrinsicNode(
1968       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
1969       Load->getMemoryVT(), Load->getMemOperand());
1970 
1971   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
1972   return DAG.getMergeValues({Result, Load->getChain()}, DL);
1973 }
1974 
1975 SDValue
1976 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
1977                                                       SelectionDAG &DAG) const {
1978   auto *Store = cast<StoreSDNode>(Op);
1979 
1980   SDLoc DL(Op);
1981   MVT VT = Store->getValue().getSimpleValueType();
1982   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1983 
1984   SDValue VL =
1985       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
1986 
1987   SDValue NewValue =
1988       convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget);
1989   return DAG.getMemIntrinsicNode(
1990       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
1991       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
1992       Store->getMemoryVT(), Store->getMemOperand());
1993 }
1994 
1995 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
1996                                                unsigned NewOpc) const {
1997   MVT VT = Op.getSimpleValueType();
1998   assert(useRVVForFixedLengthVectorVT(VT) &&
1999          "Only expected to lower fixed length vector operation!");
2000   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2001 
2002   // Create list of operands by converting existing ones to scalable types.
2003   SmallVector<SDValue, 6> Ops;
2004   for (const SDValue &V : Op->op_values()) {
2005     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
2006 
2007     // Pass through non-vector operands.
2008     if (!V.getValueType().isVector()) {
2009       Ops.push_back(V);
2010       continue;
2011     }
2012 
2013     // "cast" fixed length vector to a scalable vector.
2014     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
2015            "Only fixed length vectors are supported!");
2016     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
2017   }
2018 
2019   SDLoc DL(Op);
2020   SDValue VL =
2021       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
2022   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2023   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2024   Ops.push_back(Mask);
2025   Ops.push_back(VL);
2026 
2027   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
2028   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
2029 }
2030 
2031 // Returns the opcode of the target-specific SDNode that implements the 32-bit
2032 // form of the given Opcode.
2033 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
2034   switch (Opcode) {
2035   default:
2036     llvm_unreachable("Unexpected opcode");
2037   case ISD::SHL:
2038     return RISCVISD::SLLW;
2039   case ISD::SRA:
2040     return RISCVISD::SRAW;
2041   case ISD::SRL:
2042     return RISCVISD::SRLW;
2043   case ISD::SDIV:
2044     return RISCVISD::DIVW;
2045   case ISD::UDIV:
2046     return RISCVISD::DIVUW;
2047   case ISD::UREM:
2048     return RISCVISD::REMUW;
2049   case ISD::ROTL:
2050     return RISCVISD::ROLW;
2051   case ISD::ROTR:
2052     return RISCVISD::RORW;
2053   case RISCVISD::GREVI:
2054     return RISCVISD::GREVIW;
2055   case RISCVISD::GORCI:
2056     return RISCVISD::GORCIW;
2057   }
2058 }
2059 
2060 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
2061 // Because i32 isn't a legal type for RV64, these operations would otherwise
2062 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
2063 // later one because the fact the operation was originally of type i32 is
2064 // lost.
2065 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
2066                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
2067   SDLoc DL(N);
2068   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
2069   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2070   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2071   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2072   // ReplaceNodeResults requires we maintain the same type for the return value.
2073   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2074 }
2075 
2076 // Converts the given 32-bit operation to a i64 operation with signed extension
2077 // semantic to reduce the signed extension instructions.
2078 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
2079   SDLoc DL(N);
2080   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2081   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2082   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2083   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2084                                DAG.getValueType(MVT::i32));
2085   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2086 }
2087 
2088 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
2089                                              SmallVectorImpl<SDValue> &Results,
2090                                              SelectionDAG &DAG) const {
2091   SDLoc DL(N);
2092   switch (N->getOpcode()) {
2093   default:
2094     llvm_unreachable("Don't know how to custom type legalize this operation!");
2095   case ISD::STRICT_FP_TO_SINT:
2096   case ISD::STRICT_FP_TO_UINT:
2097   case ISD::FP_TO_SINT:
2098   case ISD::FP_TO_UINT: {
2099     bool IsStrict = N->isStrictFPOpcode();
2100     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2101            "Unexpected custom legalisation");
2102     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
2103     // If the FP type needs to be softened, emit a library call using the 'si'
2104     // version. If we left it to default legalization we'd end up with 'di'. If
2105     // the FP type doesn't need to be softened just let generic type
2106     // legalization promote the result type.
2107     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
2108         TargetLowering::TypeSoftenFloat)
2109       return;
2110     RTLIB::Libcall LC;
2111     if (N->getOpcode() == ISD::FP_TO_SINT ||
2112         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
2113       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
2114     else
2115       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
2116     MakeLibCallOptions CallOptions;
2117     EVT OpVT = Op0.getValueType();
2118     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
2119     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
2120     SDValue Result;
2121     std::tie(Result, Chain) =
2122         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
2123     Results.push_back(Result);
2124     if (IsStrict)
2125       Results.push_back(Chain);
2126     break;
2127   }
2128   case ISD::READCYCLECOUNTER: {
2129     assert(!Subtarget.is64Bit() &&
2130            "READCYCLECOUNTER only has custom type legalization on riscv32");
2131 
2132     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
2133     SDValue RCW =
2134         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
2135 
2136     Results.push_back(
2137         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
2138     Results.push_back(RCW.getValue(2));
2139     break;
2140   }
2141   case ISD::ADD:
2142   case ISD::SUB:
2143   case ISD::MUL:
2144     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2145            "Unexpected custom legalisation");
2146     if (N->getOperand(1).getOpcode() == ISD::Constant)
2147       return;
2148     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2149     break;
2150   case ISD::SHL:
2151   case ISD::SRA:
2152   case ISD::SRL:
2153     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2154            "Unexpected custom legalisation");
2155     if (N->getOperand(1).getOpcode() == ISD::Constant)
2156       return;
2157     Results.push_back(customLegalizeToWOp(N, DAG));
2158     break;
2159   case ISD::ROTL:
2160   case ISD::ROTR:
2161     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2162            "Unexpected custom legalisation");
2163     Results.push_back(customLegalizeToWOp(N, DAG));
2164     break;
2165   case ISD::SDIV:
2166   case ISD::UDIV:
2167   case ISD::UREM: {
2168     MVT VT = N->getSimpleValueType(0);
2169     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
2170            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
2171            "Unexpected custom legalisation");
2172     if (N->getOperand(0).getOpcode() == ISD::Constant ||
2173         N->getOperand(1).getOpcode() == ISD::Constant)
2174       return;
2175 
2176     // If the input is i32, use ANY_EXTEND since the W instructions don't read
2177     // the upper 32 bits. For other types we need to sign or zero extend
2178     // based on the opcode.
2179     unsigned ExtOpc = ISD::ANY_EXTEND;
2180     if (VT != MVT::i32)
2181       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
2182                                            : ISD::ZERO_EXTEND;
2183 
2184     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
2185     break;
2186   }
2187   case ISD::BITCAST: {
2188     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2189              Subtarget.hasStdExtF()) ||
2190             (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
2191            "Unexpected custom legalisation");
2192     SDValue Op0 = N->getOperand(0);
2193     if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
2194       if (Op0.getValueType() != MVT::f16)
2195         return;
2196       SDValue FPConv =
2197           DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
2198       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
2199     } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2200                Subtarget.hasStdExtF()) {
2201       if (Op0.getValueType() != MVT::f32)
2202         return;
2203       SDValue FPConv =
2204           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
2205       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
2206     }
2207     break;
2208   }
2209   case RISCVISD::GREVI:
2210   case RISCVISD::GORCI: {
2211     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2212            "Unexpected custom legalisation");
2213     // This is similar to customLegalizeToWOp, except that we pass the second
2214     // operand (a TargetConstant) straight through: it is already of type
2215     // XLenVT.
2216     SDLoc DL(N);
2217     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
2218     SDValue NewOp0 =
2219         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2220     SDValue NewRes =
2221         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
2222     // ReplaceNodeResults requires we maintain the same type for the return
2223     // value.
2224     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
2225     break;
2226   }
2227   case ISD::BSWAP:
2228   case ISD::BITREVERSE: {
2229     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2230            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2231     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2232                                  N->getOperand(0));
2233     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
2234     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
2235                                  DAG.getTargetConstant(Imm, DL,
2236                                                        Subtarget.getXLenVT()));
2237     // ReplaceNodeResults requires we maintain the same type for the return
2238     // value.
2239     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
2240     break;
2241   }
2242   case ISD::FSHL:
2243   case ISD::FSHR: {
2244     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2245            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
2246     SDValue NewOp0 =
2247         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2248     SDValue NewOp1 =
2249         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2250     SDValue NewOp2 =
2251         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
2252     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
2253     // Mask the shift amount to 5 bits.
2254     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
2255                          DAG.getConstant(0x1f, DL, MVT::i64));
2256     unsigned Opc =
2257         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
2258     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
2259     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
2260     break;
2261   }
2262   case ISD::EXTRACT_VECTOR_ELT: {
2263     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
2264     // type is illegal (currently only vXi64 RV32).
2265     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
2266     // transferred to the destination register. We issue two of these from the
2267     // upper- and lower- halves of the SEW-bit vector element, slid down to the
2268     // first element.
2269     SDLoc DL(N);
2270     SDValue Vec = N->getOperand(0);
2271     SDValue Idx = N->getOperand(1);
2272     EVT VecVT = Vec.getValueType();
2273     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
2274            VecVT.getVectorElementType() == MVT::i64 &&
2275            "Unexpected EXTRACT_VECTOR_ELT legalization");
2276 
2277     SDValue Slidedown = Vec;
2278     // Unless the index is known to be 0, we must slide the vector down to get
2279     // the desired element into index 0.
2280     if (!isNullConstant(Idx))
2281       Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
2282                               DAG.getUNDEF(VecVT), Vec, Idx);
2283 
2284     MVT XLenVT = Subtarget.getXLenVT();
2285     // Extract the lower XLEN bits of the correct vector element.
2286     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx);
2287 
2288     // To extract the upper XLEN bits of the vector element, shift the first
2289     // element right by 32 bits and re-extract the lower XLEN bits.
2290     SDValue ThirtyTwoV =
2291         DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
2292                     DAG.getConstant(32, DL, Subtarget.getXLenVT()));
2293     SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV);
2294 
2295     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx);
2296 
2297     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
2298     break;
2299   }
2300   case ISD::INTRINSIC_WO_CHAIN: {
2301     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2302     switch (IntNo) {
2303     default:
2304       llvm_unreachable(
2305           "Don't know how to custom type legalize this intrinsic!");
2306     case Intrinsic::riscv_vmv_x_s: {
2307       EVT VT = N->getValueType(0);
2308       assert((VT == MVT::i8 || VT == MVT::i16 ||
2309               (Subtarget.is64Bit() && VT == MVT::i32)) &&
2310              "Unexpected custom legalisation!");
2311       SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
2312                                     Subtarget.getXLenVT(), N->getOperand(1));
2313       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
2314       break;
2315     }
2316     }
2317     break;
2318   }
2319   case ISD::VECREDUCE_ADD:
2320   case ISD::VECREDUCE_AND:
2321   case ISD::VECREDUCE_OR:
2322   case ISD::VECREDUCE_XOR:
2323   case ISD::VECREDUCE_SMAX:
2324   case ISD::VECREDUCE_UMAX:
2325   case ISD::VECREDUCE_SMIN:
2326   case ISD::VECREDUCE_UMIN:
2327     // The custom-lowering for these nodes returns a vector whose first element
2328     // is the result of the reduction. Extract its first element and let the
2329     // legalization for EXTRACT_VECTOR_ELT do the rest of the job.
2330     Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG));
2331     break;
2332   }
2333 }
2334 
2335 // A structure to hold one of the bit-manipulation patterns below. Together, a
2336 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
2337 //   (or (and (shl x, 1), 0xAAAAAAAA),
2338 //       (and (srl x, 1), 0x55555555))
2339 struct RISCVBitmanipPat {
2340   SDValue Op;
2341   unsigned ShAmt;
2342   bool IsSHL;
2343 
2344   bool formsPairWith(const RISCVBitmanipPat &Other) const {
2345     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
2346   }
2347 };
2348 
2349 // Matches any of the following bit-manipulation patterns:
2350 //   (and (shl x, 1), (0x55555555 << 1))
2351 //   (and (srl x, 1), 0x55555555)
2352 //   (shl (and x, 0x55555555), 1)
2353 //   (srl (and x, (0x55555555 << 1)), 1)
2354 // where the shift amount and mask may vary thus:
2355 //   [1]  = 0x55555555 / 0xAAAAAAAA
2356 //   [2]  = 0x33333333 / 0xCCCCCCCC
2357 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
2358 //   [8]  = 0x00FF00FF / 0xFF00FF00
2359 //   [16] = 0x0000FFFF / 0xFFFFFFFF
2360 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
2361 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) {
2362   Optional<uint64_t> Mask;
2363   // Optionally consume a mask around the shift operation.
2364   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
2365     Mask = Op.getConstantOperandVal(1);
2366     Op = Op.getOperand(0);
2367   }
2368   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
2369     return None;
2370   bool IsSHL = Op.getOpcode() == ISD::SHL;
2371 
2372   if (!isa<ConstantSDNode>(Op.getOperand(1)))
2373     return None;
2374   auto ShAmt = Op.getConstantOperandVal(1);
2375 
2376   if (!isPowerOf2_64(ShAmt))
2377     return None;
2378 
2379   // These are the unshifted masks which we use to match bit-manipulation
2380   // patterns. They may be shifted left in certain circumstances.
2381   static const uint64_t BitmanipMasks[] = {
2382       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
2383       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL,
2384   };
2385 
2386   unsigned MaskIdx = Log2_64(ShAmt);
2387   if (MaskIdx >= array_lengthof(BitmanipMasks))
2388     return None;
2389 
2390   auto Src = Op.getOperand(0);
2391 
2392   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
2393   auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
2394 
2395   // The expected mask is shifted left when the AND is found around SHL
2396   // patterns.
2397   //   ((x >> 1) & 0x55555555)
2398   //   ((x << 1) & 0xAAAAAAAA)
2399   bool SHLExpMask = IsSHL;
2400 
2401   if (!Mask) {
2402     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
2403     // the mask is all ones: consume that now.
2404     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
2405       Mask = Src.getConstantOperandVal(1);
2406       Src = Src.getOperand(0);
2407       // The expected mask is now in fact shifted left for SRL, so reverse the
2408       // decision.
2409       //   ((x & 0xAAAAAAAA) >> 1)
2410       //   ((x & 0x55555555) << 1)
2411       SHLExpMask = !SHLExpMask;
2412     } else {
2413       // Use a default shifted mask of all-ones if there's no AND, truncated
2414       // down to the expected width. This simplifies the logic later on.
2415       Mask = maskTrailingOnes<uint64_t>(Width);
2416       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
2417     }
2418   }
2419 
2420   if (SHLExpMask)
2421     ExpMask <<= ShAmt;
2422 
2423   if (Mask != ExpMask)
2424     return None;
2425 
2426   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
2427 }
2428 
2429 // Match the following pattern as a GREVI(W) operation
2430 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
2431 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
2432                                const RISCVSubtarget &Subtarget) {
2433   EVT VT = Op.getValueType();
2434 
2435   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
2436     auto LHS = matchRISCVBitmanipPat(Op.getOperand(0));
2437     auto RHS = matchRISCVBitmanipPat(Op.getOperand(1));
2438     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
2439       SDLoc DL(Op);
2440       return DAG.getNode(
2441           RISCVISD::GREVI, DL, VT, LHS->Op,
2442           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
2443     }
2444   }
2445   return SDValue();
2446 }
2447 
2448 // Matches any the following pattern as a GORCI(W) operation
2449 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
2450 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
2451 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
2452 // Note that with the variant of 3.,
2453 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
2454 // the inner pattern will first be matched as GREVI and then the outer
2455 // pattern will be matched to GORC via the first rule above.
2456 // 4.  (or (rotl/rotr x, bitwidth/2), x)
2457 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
2458                                const RISCVSubtarget &Subtarget) {
2459   EVT VT = Op.getValueType();
2460 
2461   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
2462     SDLoc DL(Op);
2463     SDValue Op0 = Op.getOperand(0);
2464     SDValue Op1 = Op.getOperand(1);
2465 
2466     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
2467       if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
2468           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
2469         return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
2470       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
2471       if ((Reverse.getOpcode() == ISD::ROTL ||
2472            Reverse.getOpcode() == ISD::ROTR) &&
2473           Reverse.getOperand(0) == X &&
2474           isa<ConstantSDNode>(Reverse.getOperand(1))) {
2475         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
2476         if (RotAmt == (VT.getSizeInBits() / 2))
2477           return DAG.getNode(
2478               RISCVISD::GORCI, DL, VT, X,
2479               DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
2480       }
2481       return SDValue();
2482     };
2483 
2484     // Check for either commutable permutation of (or (GREVI x, shamt), x)
2485     if (SDValue V = MatchOROfReverse(Op0, Op1))
2486       return V;
2487     if (SDValue V = MatchOROfReverse(Op1, Op0))
2488       return V;
2489 
2490     // OR is commutable so canonicalize its OR operand to the left
2491     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
2492       std::swap(Op0, Op1);
2493     if (Op0.getOpcode() != ISD::OR)
2494       return SDValue();
2495     SDValue OrOp0 = Op0.getOperand(0);
2496     SDValue OrOp1 = Op0.getOperand(1);
2497     auto LHS = matchRISCVBitmanipPat(OrOp0);
2498     // OR is commutable so swap the operands and try again: x might have been
2499     // on the left
2500     if (!LHS) {
2501       std::swap(OrOp0, OrOp1);
2502       LHS = matchRISCVBitmanipPat(OrOp0);
2503     }
2504     auto RHS = matchRISCVBitmanipPat(Op1);
2505     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
2506       return DAG.getNode(
2507           RISCVISD::GORCI, DL, VT, LHS->Op,
2508           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
2509     }
2510   }
2511   return SDValue();
2512 }
2513 
2514 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
2515 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
2516 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
2517 // not undo itself, but they are redundant.
2518 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
2519   unsigned ShAmt1 = N->getConstantOperandVal(1);
2520   SDValue Src = N->getOperand(0);
2521 
2522   if (Src.getOpcode() != N->getOpcode())
2523     return SDValue();
2524 
2525   unsigned ShAmt2 = Src.getConstantOperandVal(1);
2526   Src = Src.getOperand(0);
2527 
2528   unsigned CombinedShAmt;
2529   if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
2530     CombinedShAmt = ShAmt1 | ShAmt2;
2531   else
2532     CombinedShAmt = ShAmt1 ^ ShAmt2;
2533 
2534   if (CombinedShAmt == 0)
2535     return Src;
2536 
2537   SDLoc DL(N);
2538   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
2539                      DAG.getTargetConstant(CombinedShAmt, DL,
2540                                            N->getOperand(1).getValueType()));
2541 }
2542 
2543 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2544                                                DAGCombinerInfo &DCI) const {
2545   SelectionDAG &DAG = DCI.DAG;
2546 
2547   switch (N->getOpcode()) {
2548   default:
2549     break;
2550   case RISCVISD::SplitF64: {
2551     SDValue Op0 = N->getOperand(0);
2552     // If the input to SplitF64 is just BuildPairF64 then the operation is
2553     // redundant. Instead, use BuildPairF64's operands directly.
2554     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
2555       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
2556 
2557     SDLoc DL(N);
2558 
2559     // It's cheaper to materialise two 32-bit integers than to load a double
2560     // from the constant pool and transfer it to integer registers through the
2561     // stack.
2562     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
2563       APInt V = C->getValueAPF().bitcastToAPInt();
2564       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
2565       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
2566       return DCI.CombineTo(N, Lo, Hi);
2567     }
2568 
2569     // This is a target-specific version of a DAGCombine performed in
2570     // DAGCombiner::visitBITCAST. It performs the equivalent of:
2571     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
2572     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
2573     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
2574         !Op0.getNode()->hasOneUse())
2575       break;
2576     SDValue NewSplitF64 =
2577         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
2578                     Op0.getOperand(0));
2579     SDValue Lo = NewSplitF64.getValue(0);
2580     SDValue Hi = NewSplitF64.getValue(1);
2581     APInt SignBit = APInt::getSignMask(32);
2582     if (Op0.getOpcode() == ISD::FNEG) {
2583       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
2584                                   DAG.getConstant(SignBit, DL, MVT::i32));
2585       return DCI.CombineTo(N, Lo, NewHi);
2586     }
2587     assert(Op0.getOpcode() == ISD::FABS);
2588     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
2589                                 DAG.getConstant(~SignBit, DL, MVT::i32));
2590     return DCI.CombineTo(N, Lo, NewHi);
2591   }
2592   case RISCVISD::SLLW:
2593   case RISCVISD::SRAW:
2594   case RISCVISD::SRLW:
2595   case RISCVISD::ROLW:
2596   case RISCVISD::RORW: {
2597     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
2598     SDValue LHS = N->getOperand(0);
2599     SDValue RHS = N->getOperand(1);
2600     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
2601     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
2602     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
2603         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
2604       if (N->getOpcode() != ISD::DELETED_NODE)
2605         DCI.AddToWorklist(N);
2606       return SDValue(N, 0);
2607     }
2608     break;
2609   }
2610   case RISCVISD::FSL:
2611   case RISCVISD::FSR: {
2612     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
2613     SDValue ShAmt = N->getOperand(2);
2614     unsigned BitWidth = ShAmt.getValueSizeInBits();
2615     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
2616     APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
2617     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
2618       if (N->getOpcode() != ISD::DELETED_NODE)
2619         DCI.AddToWorklist(N);
2620       return SDValue(N, 0);
2621     }
2622     break;
2623   }
2624   case RISCVISD::FSLW:
2625   case RISCVISD::FSRW: {
2626     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
2627     // read.
2628     SDValue Op0 = N->getOperand(0);
2629     SDValue Op1 = N->getOperand(1);
2630     SDValue ShAmt = N->getOperand(2);
2631     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
2632     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
2633     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
2634         SimplifyDemandedBits(Op1, OpMask, DCI) ||
2635         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
2636       if (N->getOpcode() != ISD::DELETED_NODE)
2637         DCI.AddToWorklist(N);
2638       return SDValue(N, 0);
2639     }
2640     break;
2641   }
2642   case RISCVISD::GREVIW:
2643   case RISCVISD::GORCIW: {
2644     // Only the lower 32 bits of the first operand are read
2645     SDValue Op0 = N->getOperand(0);
2646     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
2647     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
2648       if (N->getOpcode() != ISD::DELETED_NODE)
2649         DCI.AddToWorklist(N);
2650       return SDValue(N, 0);
2651     }
2652 
2653     return combineGREVI_GORCI(N, DCI.DAG);
2654   }
2655   case RISCVISD::FMV_X_ANYEXTW_RV64: {
2656     SDLoc DL(N);
2657     SDValue Op0 = N->getOperand(0);
2658     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
2659     // conversion is unnecessary and can be replaced with an ANY_EXTEND
2660     // of the FMV_W_X_RV64 operand.
2661     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
2662       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
2663              "Unexpected value type!");
2664       return Op0.getOperand(0);
2665     }
2666 
2667     // This is a target-specific version of a DAGCombine performed in
2668     // DAGCombiner::visitBITCAST. It performs the equivalent of:
2669     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
2670     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
2671     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
2672         !Op0.getNode()->hasOneUse())
2673       break;
2674     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
2675                                  Op0.getOperand(0));
2676     APInt SignBit = APInt::getSignMask(32).sext(64);
2677     if (Op0.getOpcode() == ISD::FNEG)
2678       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
2679                          DAG.getConstant(SignBit, DL, MVT::i64));
2680 
2681     assert(Op0.getOpcode() == ISD::FABS);
2682     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
2683                        DAG.getConstant(~SignBit, DL, MVT::i64));
2684   }
2685   case RISCVISD::GREVI:
2686   case RISCVISD::GORCI:
2687     return combineGREVI_GORCI(N, DCI.DAG);
2688   case ISD::OR:
2689     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
2690       return GREV;
2691     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
2692       return GORC;
2693     break;
2694   case RISCVISD::SELECT_CC: {
2695     // Transform
2696     // (select_cc (xor X, 1), 0, setne, trueV, falseV) ->
2697     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
2698     // This can occur when legalizing some floating point comparisons.
2699     SDValue LHS = N->getOperand(0);
2700     SDValue RHS = N->getOperand(1);
2701     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
2702     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
2703     if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) &&
2704         LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) &&
2705         DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) {
2706       SDLoc DL(N);
2707       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
2708       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
2709       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
2710                          {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3),
2711                           N->getOperand(4)});
2712     }
2713     break;
2714   }
2715   case ISD::SETCC: {
2716     // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1.
2717     // Comparing with 0 may allow us to fold into bnez/beqz.
2718     SDValue LHS = N->getOperand(0);
2719     SDValue RHS = N->getOperand(1);
2720     if (LHS.getValueType().isScalableVector())
2721       break;
2722     auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2723     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
2724     if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) &&
2725         DAG.MaskedValueIsZero(LHS, Mask)) {
2726       SDLoc DL(N);
2727       SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType());
2728       CC = ISD::getSetCCInverse(CC, LHS.getValueType());
2729       return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC);
2730     }
2731     break;
2732   }
2733   }
2734 
2735   return SDValue();
2736 }
2737 
2738 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
2739     const SDNode *N, CombineLevel Level) const {
2740   // The following folds are only desirable if `(OP _, c1 << c2)` can be
2741   // materialised in fewer instructions than `(OP _, c1)`:
2742   //
2743   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2744   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2745   SDValue N0 = N->getOperand(0);
2746   EVT Ty = N0.getValueType();
2747   if (Ty.isScalarInteger() &&
2748       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
2749     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2750     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
2751     if (C1 && C2) {
2752       const APInt &C1Int = C1->getAPIntValue();
2753       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
2754 
2755       // We can materialise `c1 << c2` into an add immediate, so it's "free",
2756       // and the combine should happen, to potentially allow further combines
2757       // later.
2758       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
2759           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
2760         return true;
2761 
2762       // We can materialise `c1` in an add immediate, so it's "free", and the
2763       // combine should be prevented.
2764       if (C1Int.getMinSignedBits() <= 64 &&
2765           isLegalAddImmediate(C1Int.getSExtValue()))
2766         return false;
2767 
2768       // Neither constant will fit into an immediate, so find materialisation
2769       // costs.
2770       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
2771                                               Subtarget.is64Bit());
2772       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
2773           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
2774 
2775       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
2776       // combine should be prevented.
2777       if (C1Cost < ShiftedC1Cost)
2778         return false;
2779     }
2780   }
2781   return true;
2782 }
2783 
2784 bool RISCVTargetLowering::targetShrinkDemandedConstant(
2785     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2786     TargetLoweringOpt &TLO) const {
2787   // Delay this optimization as late as possible.
2788   if (!TLO.LegalOps)
2789     return false;
2790 
2791   EVT VT = Op.getValueType();
2792   if (VT.isVector())
2793     return false;
2794 
2795   // Only handle AND for now.
2796   if (Op.getOpcode() != ISD::AND)
2797     return false;
2798 
2799   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2800   if (!C)
2801     return false;
2802 
2803   const APInt &Mask = C->getAPIntValue();
2804 
2805   // Clear all non-demanded bits initially.
2806   APInt ShrunkMask = Mask & DemandedBits;
2807 
2808   // If the shrunk mask fits in sign extended 12 bits, let the target
2809   // independent code apply it.
2810   if (ShrunkMask.isSignedIntN(12))
2811     return false;
2812 
2813   // Try to make a smaller immediate by setting undemanded bits.
2814 
2815   // We need to be able to make a negative number through a combination of mask
2816   // and undemanded bits.
2817   APInt ExpandedMask = Mask | ~DemandedBits;
2818   if (!ExpandedMask.isNegative())
2819     return false;
2820 
2821   // What is the fewest number of bits we need to represent the negative number.
2822   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
2823 
2824   // Try to make a 12 bit negative immediate. If that fails try to make a 32
2825   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
2826   APInt NewMask = ShrunkMask;
2827   if (MinSignedBits <= 12)
2828     NewMask.setBitsFrom(11);
2829   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
2830     NewMask.setBitsFrom(31);
2831   else
2832     return false;
2833 
2834   // Sanity check that our new mask is a subset of the demanded mask.
2835   assert(NewMask.isSubsetOf(ExpandedMask));
2836 
2837   // If we aren't changing the mask, just return true to keep it and prevent
2838   // the caller from optimizing.
2839   if (NewMask == Mask)
2840     return true;
2841 
2842   // Replace the constant with the new mask.
2843   SDLoc DL(Op);
2844   SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
2845   SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
2846   return TLO.CombineTo(Op, NewOp);
2847 }
2848 
2849 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2850                                                         KnownBits &Known,
2851                                                         const APInt &DemandedElts,
2852                                                         const SelectionDAG &DAG,
2853                                                         unsigned Depth) const {
2854   unsigned BitWidth = Known.getBitWidth();
2855   unsigned Opc = Op.getOpcode();
2856   assert((Opc >= ISD::BUILTIN_OP_END ||
2857           Opc == ISD::INTRINSIC_WO_CHAIN ||
2858           Opc == ISD::INTRINSIC_W_CHAIN ||
2859           Opc == ISD::INTRINSIC_VOID) &&
2860          "Should use MaskedValueIsZero if you don't know whether Op"
2861          " is a target node!");
2862 
2863   Known.resetAll();
2864   switch (Opc) {
2865   default: break;
2866   case RISCVISD::REMUW: {
2867     KnownBits Known2;
2868     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
2869     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
2870     // We only care about the lower 32 bits.
2871     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
2872     // Restore the original width by sign extending.
2873     Known = Known.sext(BitWidth);
2874     break;
2875   }
2876   case RISCVISD::DIVUW: {
2877     KnownBits Known2;
2878     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
2879     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
2880     // We only care about the lower 32 bits.
2881     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
2882     // Restore the original width by sign extending.
2883     Known = Known.sext(BitWidth);
2884     break;
2885   }
2886   case RISCVISD::READ_VLENB:
2887     // We assume VLENB is at least 8 bytes.
2888     // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
2889     Known.Zero.setLowBits(3);
2890     break;
2891   }
2892 }
2893 
2894 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
2895     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
2896     unsigned Depth) const {
2897   switch (Op.getOpcode()) {
2898   default:
2899     break;
2900   case RISCVISD::SLLW:
2901   case RISCVISD::SRAW:
2902   case RISCVISD::SRLW:
2903   case RISCVISD::DIVW:
2904   case RISCVISD::DIVUW:
2905   case RISCVISD::REMUW:
2906   case RISCVISD::ROLW:
2907   case RISCVISD::RORW:
2908   case RISCVISD::GREVIW:
2909   case RISCVISD::GORCIW:
2910   case RISCVISD::FSLW:
2911   case RISCVISD::FSRW:
2912     // TODO: As the result is sign-extended, this is conservatively correct. A
2913     // more precise answer could be calculated for SRAW depending on known
2914     // bits in the shift amount.
2915     return 33;
2916   case RISCVISD::VMV_X_S:
2917     // The number of sign bits of the scalar result is computed by obtaining the
2918     // element type of the input vector operand, subtracting its width from the
2919     // XLEN, and then adding one (sign bit within the element type). If the
2920     // element type is wider than XLen, the least-significant XLEN bits are
2921     // taken.
2922     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
2923       return 1;
2924     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
2925   }
2926 
2927   return 1;
2928 }
2929 
2930 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
2931                                                   MachineBasicBlock *BB) {
2932   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
2933 
2934   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
2935   // Should the count have wrapped while it was being read, we need to try
2936   // again.
2937   // ...
2938   // read:
2939   // rdcycleh x3 # load high word of cycle
2940   // rdcycle  x2 # load low word of cycle
2941   // rdcycleh x4 # load high word of cycle
2942   // bne x3, x4, read # check if high word reads match, otherwise try again
2943   // ...
2944 
2945   MachineFunction &MF = *BB->getParent();
2946   const BasicBlock *LLVM_BB = BB->getBasicBlock();
2947   MachineFunction::iterator It = ++BB->getIterator();
2948 
2949   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
2950   MF.insert(It, LoopMBB);
2951 
2952   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
2953   MF.insert(It, DoneMBB);
2954 
2955   // Transfer the remainder of BB and its successor edges to DoneMBB.
2956   DoneMBB->splice(DoneMBB->begin(), BB,
2957                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
2958   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
2959 
2960   BB->addSuccessor(LoopMBB);
2961 
2962   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2963   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2964   Register LoReg = MI.getOperand(0).getReg();
2965   Register HiReg = MI.getOperand(1).getReg();
2966   DebugLoc DL = MI.getDebugLoc();
2967 
2968   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
2969   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
2970       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
2971       .addReg(RISCV::X0);
2972   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
2973       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
2974       .addReg(RISCV::X0);
2975   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
2976       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
2977       .addReg(RISCV::X0);
2978 
2979   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
2980       .addReg(HiReg)
2981       .addReg(ReadAgainReg)
2982       .addMBB(LoopMBB);
2983 
2984   LoopMBB->addSuccessor(LoopMBB);
2985   LoopMBB->addSuccessor(DoneMBB);
2986 
2987   MI.eraseFromParent();
2988 
2989   return DoneMBB;
2990 }
2991 
2992 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
2993                                              MachineBasicBlock *BB) {
2994   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
2995 
2996   MachineFunction &MF = *BB->getParent();
2997   DebugLoc DL = MI.getDebugLoc();
2998   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2999   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
3000   Register LoReg = MI.getOperand(0).getReg();
3001   Register HiReg = MI.getOperand(1).getReg();
3002   Register SrcReg = MI.getOperand(2).getReg();
3003   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
3004   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
3005 
3006   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
3007                           RI);
3008   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
3009   MachineMemOperand *MMOLo =
3010       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
3011   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
3012       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
3013   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
3014       .addFrameIndex(FI)
3015       .addImm(0)
3016       .addMemOperand(MMOLo);
3017   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
3018       .addFrameIndex(FI)
3019       .addImm(4)
3020       .addMemOperand(MMOHi);
3021   MI.eraseFromParent(); // The pseudo instruction is gone now.
3022   return BB;
3023 }
3024 
3025 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
3026                                                  MachineBasicBlock *BB) {
3027   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
3028          "Unexpected instruction");
3029 
3030   MachineFunction &MF = *BB->getParent();
3031   DebugLoc DL = MI.getDebugLoc();
3032   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3033   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
3034   Register DstReg = MI.getOperand(0).getReg();
3035   Register LoReg = MI.getOperand(1).getReg();
3036   Register HiReg = MI.getOperand(2).getReg();
3037   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
3038   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
3039 
3040   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
3041   MachineMemOperand *MMOLo =
3042       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
3043   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
3044       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
3045   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
3046       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
3047       .addFrameIndex(FI)
3048       .addImm(0)
3049       .addMemOperand(MMOLo);
3050   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
3051       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
3052       .addFrameIndex(FI)
3053       .addImm(4)
3054       .addMemOperand(MMOHi);
3055   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
3056   MI.eraseFromParent(); // The pseudo instruction is gone now.
3057   return BB;
3058 }
3059 
3060 static bool isSelectPseudo(MachineInstr &MI) {
3061   switch (MI.getOpcode()) {
3062   default:
3063     return false;
3064   case RISCV::Select_GPR_Using_CC_GPR:
3065   case RISCV::Select_FPR16_Using_CC_GPR:
3066   case RISCV::Select_FPR32_Using_CC_GPR:
3067   case RISCV::Select_FPR64_Using_CC_GPR:
3068     return true;
3069   }
3070 }
3071 
3072 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
3073                                            MachineBasicBlock *BB) {
3074   // To "insert" Select_* instructions, we actually have to insert the triangle
3075   // control-flow pattern.  The incoming instructions know the destination vreg
3076   // to set, the condition code register to branch on, the true/false values to
3077   // select between, and the condcode to use to select the appropriate branch.
3078   //
3079   // We produce the following control flow:
3080   //     HeadMBB
3081   //     |  \
3082   //     |  IfFalseMBB
3083   //     | /
3084   //    TailMBB
3085   //
3086   // When we find a sequence of selects we attempt to optimize their emission
3087   // by sharing the control flow. Currently we only handle cases where we have
3088   // multiple selects with the exact same condition (same LHS, RHS and CC).
3089   // The selects may be interleaved with other instructions if the other
3090   // instructions meet some requirements we deem safe:
3091   // - They are debug instructions. Otherwise,
3092   // - They do not have side-effects, do not access memory and their inputs do
3093   //   not depend on the results of the select pseudo-instructions.
3094   // The TrueV/FalseV operands of the selects cannot depend on the result of
3095   // previous selects in the sequence.
3096   // These conditions could be further relaxed. See the X86 target for a
3097   // related approach and more information.
3098   Register LHS = MI.getOperand(1).getReg();
3099   Register RHS = MI.getOperand(2).getReg();
3100   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
3101 
3102   SmallVector<MachineInstr *, 4> SelectDebugValues;
3103   SmallSet<Register, 4> SelectDests;
3104   SelectDests.insert(MI.getOperand(0).getReg());
3105 
3106   MachineInstr *LastSelectPseudo = &MI;
3107 
3108   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
3109        SequenceMBBI != E; ++SequenceMBBI) {
3110     if (SequenceMBBI->isDebugInstr())
3111       continue;
3112     else if (isSelectPseudo(*SequenceMBBI)) {
3113       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
3114           SequenceMBBI->getOperand(2).getReg() != RHS ||
3115           SequenceMBBI->getOperand(3).getImm() != CC ||
3116           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
3117           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
3118         break;
3119       LastSelectPseudo = &*SequenceMBBI;
3120       SequenceMBBI->collectDebugValues(SelectDebugValues);
3121       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
3122     } else {
3123       if (SequenceMBBI->hasUnmodeledSideEffects() ||
3124           SequenceMBBI->mayLoadOrStore())
3125         break;
3126       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
3127             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
3128           }))
3129         break;
3130     }
3131   }
3132 
3133   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
3134   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3135   DebugLoc DL = MI.getDebugLoc();
3136   MachineFunction::iterator I = ++BB->getIterator();
3137 
3138   MachineBasicBlock *HeadMBB = BB;
3139   MachineFunction *F = BB->getParent();
3140   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
3141   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
3142 
3143   F->insert(I, IfFalseMBB);
3144   F->insert(I, TailMBB);
3145 
3146   // Transfer debug instructions associated with the selects to TailMBB.
3147   for (MachineInstr *DebugInstr : SelectDebugValues) {
3148     TailMBB->push_back(DebugInstr->removeFromParent());
3149   }
3150 
3151   // Move all instructions after the sequence to TailMBB.
3152   TailMBB->splice(TailMBB->end(), HeadMBB,
3153                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
3154   // Update machine-CFG edges by transferring all successors of the current
3155   // block to the new block which will contain the Phi nodes for the selects.
3156   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
3157   // Set the successors for HeadMBB.
3158   HeadMBB->addSuccessor(IfFalseMBB);
3159   HeadMBB->addSuccessor(TailMBB);
3160 
3161   // Insert appropriate branch.
3162   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
3163 
3164   BuildMI(HeadMBB, DL, TII.get(Opcode))
3165     .addReg(LHS)
3166     .addReg(RHS)
3167     .addMBB(TailMBB);
3168 
3169   // IfFalseMBB just falls through to TailMBB.
3170   IfFalseMBB->addSuccessor(TailMBB);
3171 
3172   // Create PHIs for all of the select pseudo-instructions.
3173   auto SelectMBBI = MI.getIterator();
3174   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
3175   auto InsertionPoint = TailMBB->begin();
3176   while (SelectMBBI != SelectEnd) {
3177     auto Next = std::next(SelectMBBI);
3178     if (isSelectPseudo(*SelectMBBI)) {
3179       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
3180       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
3181               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
3182           .addReg(SelectMBBI->getOperand(4).getReg())
3183           .addMBB(HeadMBB)
3184           .addReg(SelectMBBI->getOperand(5).getReg())
3185           .addMBB(IfFalseMBB);
3186       SelectMBBI->eraseFromParent();
3187     }
3188     SelectMBBI = Next;
3189   }
3190 
3191   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
3192   return TailMBB;
3193 }
3194 
3195 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
3196                                     int VLIndex, unsigned SEWIndex,
3197                                     RISCVVLMUL VLMul, bool ForceTailAgnostic) {
3198   MachineFunction &MF = *BB->getParent();
3199   DebugLoc DL = MI.getDebugLoc();
3200   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3201 
3202   unsigned SEW = MI.getOperand(SEWIndex).getImm();
3203   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
3204   RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
3205 
3206   MachineRegisterInfo &MRI = MF.getRegInfo();
3207 
3208   // VL and VTYPE are alive here.
3209   MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI));
3210 
3211   if (VLIndex >= 0) {
3212     // Set VL (rs1 != X0).
3213     Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3214     MIB.addReg(DestReg, RegState::Define | RegState::Dead)
3215         .addReg(MI.getOperand(VLIndex).getReg());
3216   } else
3217     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
3218     MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead)
3219         .addReg(RISCV::X0, RegState::Kill);
3220 
3221   // Default to tail agnostic unless the destination is tied to a source. In
3222   // that case the user would have some control over the tail values. The tail
3223   // policy is also ignored on instructions that only update element 0 like
3224   // vmv.s.x or reductions so use agnostic there to match the common case.
3225   // FIXME: This is conservatively correct, but we might want to detect that
3226   // the input is undefined.
3227   bool TailAgnostic = true;
3228   unsigned UseOpIdx;
3229   if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
3230     TailAgnostic = false;
3231     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
3232     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
3233     MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
3234     if (UseMI && UseMI->isImplicitDef())
3235       TailAgnostic = true;
3236   }
3237 
3238   // For simplicity we reuse the vtype representation here.
3239   MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
3240                                      /*TailAgnostic*/ TailAgnostic,
3241                                      /*MaskAgnostic*/ false));
3242 
3243   // Remove (now) redundant operands from pseudo
3244   MI.getOperand(SEWIndex).setImm(-1);
3245   if (VLIndex >= 0) {
3246     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
3247     MI.getOperand(VLIndex).setIsKill(false);
3248   }
3249 
3250   return BB;
3251 }
3252 
3253 MachineBasicBlock *
3254 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3255                                                  MachineBasicBlock *BB) const {
3256   uint64_t TSFlags = MI.getDesc().TSFlags;
3257 
3258   if (TSFlags & RISCVII::HasSEWOpMask) {
3259     unsigned NumOperands = MI.getNumExplicitOperands();
3260     int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
3261     unsigned SEWIndex = NumOperands - 1;
3262     bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask;
3263 
3264     RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
3265                                                RISCVII::VLMulShift);
3266     return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic);
3267   }
3268 
3269   switch (MI.getOpcode()) {
3270   default:
3271     llvm_unreachable("Unexpected instr type to insert");
3272   case RISCV::ReadCycleWide:
3273     assert(!Subtarget.is64Bit() &&
3274            "ReadCycleWrite is only to be used on riscv32");
3275     return emitReadCycleWidePseudo(MI, BB);
3276   case RISCV::Select_GPR_Using_CC_GPR:
3277   case RISCV::Select_FPR16_Using_CC_GPR:
3278   case RISCV::Select_FPR32_Using_CC_GPR:
3279   case RISCV::Select_FPR64_Using_CC_GPR:
3280     return emitSelectPseudo(MI, BB);
3281   case RISCV::BuildPairF64Pseudo:
3282     return emitBuildPairF64Pseudo(MI, BB);
3283   case RISCV::SplitF64Pseudo:
3284     return emitSplitF64Pseudo(MI, BB);
3285   }
3286 }
3287 
3288 // Calling Convention Implementation.
3289 // The expectations for frontend ABI lowering vary from target to target.
3290 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
3291 // details, but this is a longer term goal. For now, we simply try to keep the
3292 // role of the frontend as simple and well-defined as possible. The rules can
3293 // be summarised as:
3294 // * Never split up large scalar arguments. We handle them here.
3295 // * If a hardfloat calling convention is being used, and the struct may be
3296 // passed in a pair of registers (fp+fp, int+fp), and both registers are
3297 // available, then pass as two separate arguments. If either the GPRs or FPRs
3298 // are exhausted, then pass according to the rule below.
3299 // * If a struct could never be passed in registers or directly in a stack
3300 // slot (as it is larger than 2*XLEN and the floating point rules don't
3301 // apply), then pass it using a pointer with the byval attribute.
3302 // * If a struct is less than 2*XLEN, then coerce to either a two-element
3303 // word-sized array or a 2*XLEN scalar (depending on alignment).
3304 // * The frontend can determine whether a struct is returned by reference or
3305 // not based on its size and fields. If it will be returned by reference, the
3306 // frontend must modify the prototype so a pointer with the sret annotation is
3307 // passed as the first argument. This is not necessary for large scalar
3308 // returns.
3309 // * Struct return values and varargs should be coerced to structs containing
3310 // register-size fields in the same situations they would be for fixed
3311 // arguments.
3312 
3313 static const MCPhysReg ArgGPRs[] = {
3314   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
3315   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
3316 };
3317 static const MCPhysReg ArgFPR16s[] = {
3318   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
3319   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
3320 };
3321 static const MCPhysReg ArgFPR32s[] = {
3322   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
3323   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
3324 };
3325 static const MCPhysReg ArgFPR64s[] = {
3326   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
3327   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
3328 };
3329 // This is an interim calling convention and it may be changed in the future.
3330 static const MCPhysReg ArgVRs[] = {
3331     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
3332     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
3333     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
3334 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
3335                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
3336                                      RISCV::V20M2, RISCV::V22M2};
3337 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
3338                                      RISCV::V20M4};
3339 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
3340 
3341 // Pass a 2*XLEN argument that has been split into two XLEN values through
3342 // registers or the stack as necessary.
3343 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
3344                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
3345                                 MVT ValVT2, MVT LocVT2,
3346                                 ISD::ArgFlagsTy ArgFlags2) {
3347   unsigned XLenInBytes = XLen / 8;
3348   if (Register Reg = State.AllocateReg(ArgGPRs)) {
3349     // At least one half can be passed via register.
3350     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3351                                      VA1.getLocVT(), CCValAssign::Full));
3352   } else {
3353     // Both halves must be passed on the stack, with proper alignment.
3354     Align StackAlign =
3355         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3356     State.addLoc(
3357         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
3358                             State.AllocateStack(XLenInBytes, StackAlign),
3359                             VA1.getLocVT(), CCValAssign::Full));
3360     State.addLoc(CCValAssign::getMem(
3361         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
3362         LocVT2, CCValAssign::Full));
3363     return false;
3364   }
3365 
3366   if (Register Reg = State.AllocateReg(ArgGPRs)) {
3367     // The second half can also be passed via register.
3368     State.addLoc(
3369         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3370   } else {
3371     // The second half is passed via the stack, without additional alignment.
3372     State.addLoc(CCValAssign::getMem(
3373         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
3374         LocVT2, CCValAssign::Full));
3375   }
3376 
3377   return false;
3378 }
3379 
3380 // Implements the RISC-V calling convention. Returns true upon failure.
3381 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
3382                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
3383                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
3384                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
3385                      Optional<unsigned> FirstMaskArgument) {
3386   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
3387   assert(XLen == 32 || XLen == 64);
3388   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
3389 
3390   // Any return value split in to more than two values can't be returned
3391   // directly.
3392   if (IsRet && ValNo > 1)
3393     return true;
3394 
3395   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
3396   // variadic argument, or if no F16/F32 argument registers are available.
3397   bool UseGPRForF16_F32 = true;
3398   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
3399   // variadic argument, or if no F64 argument registers are available.
3400   bool UseGPRForF64 = true;
3401 
3402   switch (ABI) {
3403   default:
3404     llvm_unreachable("Unexpected ABI");
3405   case RISCVABI::ABI_ILP32:
3406   case RISCVABI::ABI_LP64:
3407     break;
3408   case RISCVABI::ABI_ILP32F:
3409   case RISCVABI::ABI_LP64F:
3410     UseGPRForF16_F32 = !IsFixed;
3411     break;
3412   case RISCVABI::ABI_ILP32D:
3413   case RISCVABI::ABI_LP64D:
3414     UseGPRForF16_F32 = !IsFixed;
3415     UseGPRForF64 = !IsFixed;
3416     break;
3417   }
3418 
3419   // FPR16, FPR32, and FPR64 alias each other.
3420   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
3421     UseGPRForF16_F32 = true;
3422     UseGPRForF64 = true;
3423   }
3424 
3425   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
3426   // similar local variables rather than directly checking against the target
3427   // ABI.
3428 
3429   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
3430     LocVT = XLenVT;
3431     LocInfo = CCValAssign::BCvt;
3432   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
3433     LocVT = MVT::i64;
3434     LocInfo = CCValAssign::BCvt;
3435   }
3436 
3437   // If this is a variadic argument, the RISC-V calling convention requires
3438   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
3439   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
3440   // be used regardless of whether the original argument was split during
3441   // legalisation or not. The argument will not be passed by registers if the
3442   // original type is larger than 2*XLEN, so the register alignment rule does
3443   // not apply.
3444   unsigned TwoXLenInBytes = (2 * XLen) / 8;
3445   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
3446       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
3447     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3448     // Skip 'odd' register if necessary.
3449     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
3450       State.AllocateReg(ArgGPRs);
3451   }
3452 
3453   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3454   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3455       State.getPendingArgFlags();
3456 
3457   assert(PendingLocs.size() == PendingArgFlags.size() &&
3458          "PendingLocs and PendingArgFlags out of sync");
3459 
3460   // Handle passing f64 on RV32D with a soft float ABI or when floating point
3461   // registers are exhausted.
3462   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
3463     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
3464            "Can't lower f64 if it is split");
3465     // Depending on available argument GPRS, f64 may be passed in a pair of
3466     // GPRs, split between a GPR and the stack, or passed completely on the
3467     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
3468     // cases.
3469     Register Reg = State.AllocateReg(ArgGPRs);
3470     LocVT = MVT::i32;
3471     if (!Reg) {
3472       unsigned StackOffset = State.AllocateStack(8, Align(8));
3473       State.addLoc(
3474           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3475       return false;
3476     }
3477     if (!State.AllocateReg(ArgGPRs))
3478       State.AllocateStack(4, Align(4));
3479     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3480     return false;
3481   }
3482 
3483   // Split arguments might be passed indirectly, so keep track of the pending
3484   // values.
3485   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
3486     LocVT = XLenVT;
3487     LocInfo = CCValAssign::Indirect;
3488     PendingLocs.push_back(
3489         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3490     PendingArgFlags.push_back(ArgFlags);
3491     if (!ArgFlags.isSplitEnd()) {
3492       return false;
3493     }
3494   }
3495 
3496   // If the split argument only had two elements, it should be passed directly
3497   // in registers or on the stack.
3498   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
3499     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3500     // Apply the normal calling convention rules to the first half of the
3501     // split argument.
3502     CCValAssign VA = PendingLocs[0];
3503     ISD::ArgFlagsTy AF = PendingArgFlags[0];
3504     PendingLocs.clear();
3505     PendingArgFlags.clear();
3506     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
3507                                ArgFlags);
3508   }
3509 
3510   // Allocate to a register if possible, or else a stack slot.
3511   Register Reg;
3512   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
3513     Reg = State.AllocateReg(ArgFPR16s);
3514   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
3515     Reg = State.AllocateReg(ArgFPR32s);
3516   else if (ValVT == MVT::f64 && !UseGPRForF64)
3517     Reg = State.AllocateReg(ArgFPR64s);
3518   else if (ValVT.isScalableVector()) {
3519     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
3520     if (RC == &RISCV::VRRegClass) {
3521       // Assign the first mask argument to V0.
3522       // This is an interim calling convention and it may be changed in the
3523       // future.
3524       if (FirstMaskArgument.hasValue() &&
3525           ValNo == FirstMaskArgument.getValue()) {
3526         Reg = State.AllocateReg(RISCV::V0);
3527       } else {
3528         Reg = State.AllocateReg(ArgVRs);
3529       }
3530     } else if (RC == &RISCV::VRM2RegClass) {
3531       Reg = State.AllocateReg(ArgVRM2s);
3532     } else if (RC == &RISCV::VRM4RegClass) {
3533       Reg = State.AllocateReg(ArgVRM4s);
3534     } else if (RC == &RISCV::VRM8RegClass) {
3535       Reg = State.AllocateReg(ArgVRM8s);
3536     } else {
3537       llvm_unreachable("Unhandled class register for ValueType");
3538     }
3539     if (!Reg) {
3540       LocInfo = CCValAssign::Indirect;
3541       // Try using a GPR to pass the address
3542       Reg = State.AllocateReg(ArgGPRs);
3543       LocVT = XLenVT;
3544     }
3545   } else
3546     Reg = State.AllocateReg(ArgGPRs);
3547   unsigned StackOffset =
3548       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
3549 
3550   // If we reach this point and PendingLocs is non-empty, we must be at the
3551   // end of a split argument that must be passed indirectly.
3552   if (!PendingLocs.empty()) {
3553     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3554     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3555 
3556     for (auto &It : PendingLocs) {
3557       if (Reg)
3558         It.convertToReg(Reg);
3559       else
3560         It.convertToMem(StackOffset);
3561       State.addLoc(It);
3562     }
3563     PendingLocs.clear();
3564     PendingArgFlags.clear();
3565     return false;
3566   }
3567 
3568   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
3569           (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
3570          "Expected an XLenVT or scalable vector types at this stage");
3571 
3572   if (Reg) {
3573     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3574     return false;
3575   }
3576 
3577   // When a floating-point value is passed on the stack, no bit-conversion is
3578   // needed.
3579   if (ValVT.isFloatingPoint()) {
3580     LocVT = ValVT;
3581     LocInfo = CCValAssign::Full;
3582   }
3583   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3584   return false;
3585 }
3586 
3587 template <typename ArgTy>
3588 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
3589   for (const auto &ArgIdx : enumerate(Args)) {
3590     MVT ArgVT = ArgIdx.value().VT;
3591     if (ArgVT.isScalableVector() &&
3592         ArgVT.getVectorElementType().SimpleTy == MVT::i1)
3593       return ArgIdx.index();
3594   }
3595   return None;
3596 }
3597 
3598 void RISCVTargetLowering::analyzeInputArgs(
3599     MachineFunction &MF, CCState &CCInfo,
3600     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
3601   unsigned NumArgs = Ins.size();
3602   FunctionType *FType = MF.getFunction().getFunctionType();
3603 
3604   Optional<unsigned> FirstMaskArgument;
3605   if (Subtarget.hasStdExtV())
3606     FirstMaskArgument = preAssignMask(Ins);
3607 
3608   for (unsigned i = 0; i != NumArgs; ++i) {
3609     MVT ArgVT = Ins[i].VT;
3610     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
3611 
3612     Type *ArgTy = nullptr;
3613     if (IsRet)
3614       ArgTy = FType->getReturnType();
3615     else if (Ins[i].isOrigArg())
3616       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3617 
3618     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3619     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
3620                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
3621                  FirstMaskArgument)) {
3622       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
3623                         << EVT(ArgVT).getEVTString() << '\n');
3624       llvm_unreachable(nullptr);
3625     }
3626   }
3627 }
3628 
3629 void RISCVTargetLowering::analyzeOutputArgs(
3630     MachineFunction &MF, CCState &CCInfo,
3631     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3632     CallLoweringInfo *CLI) const {
3633   unsigned NumArgs = Outs.size();
3634 
3635   Optional<unsigned> FirstMaskArgument;
3636   if (Subtarget.hasStdExtV())
3637     FirstMaskArgument = preAssignMask(Outs);
3638 
3639   for (unsigned i = 0; i != NumArgs; i++) {
3640     MVT ArgVT = Outs[i].VT;
3641     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3642     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3643 
3644     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3645     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
3646                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
3647                  FirstMaskArgument)) {
3648       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
3649                         << EVT(ArgVT).getEVTString() << "\n");
3650       llvm_unreachable(nullptr);
3651     }
3652   }
3653 }
3654 
3655 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3656 // values.
3657 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
3658                                    const CCValAssign &VA, const SDLoc &DL) {
3659   switch (VA.getLocInfo()) {
3660   default:
3661     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3662   case CCValAssign::Full:
3663     break;
3664   case CCValAssign::BCvt:
3665     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
3666       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
3667     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3668       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
3669     else
3670       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3671     break;
3672   }
3673   return Val;
3674 }
3675 
3676 // The caller is responsible for loading the full value if the argument is
3677 // passed with CCValAssign::Indirect.
3678 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
3679                                 const CCValAssign &VA, const SDLoc &DL,
3680                                 const RISCVTargetLowering &TLI) {
3681   MachineFunction &MF = DAG.getMachineFunction();
3682   MachineRegisterInfo &RegInfo = MF.getRegInfo();
3683   EVT LocVT = VA.getLocVT();
3684   SDValue Val;
3685   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3686   Register VReg = RegInfo.createVirtualRegister(RC);
3687   RegInfo.addLiveIn(VA.getLocReg(), VReg);
3688   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3689 
3690   if (VA.getLocInfo() == CCValAssign::Indirect)
3691     return Val;
3692 
3693   return convertLocVTToValVT(DAG, Val, VA, DL);
3694 }
3695 
3696 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
3697                                    const CCValAssign &VA, const SDLoc &DL) {
3698   EVT LocVT = VA.getLocVT();
3699 
3700   switch (VA.getLocInfo()) {
3701   default:
3702     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3703   case CCValAssign::Full:
3704     break;
3705   case CCValAssign::BCvt:
3706     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
3707       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
3708     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3709       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
3710     else
3711       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3712     break;
3713   }
3714   return Val;
3715 }
3716 
3717 // The caller is responsible for loading the full value if the argument is
3718 // passed with CCValAssign::Indirect.
3719 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
3720                                 const CCValAssign &VA, const SDLoc &DL) {
3721   MachineFunction &MF = DAG.getMachineFunction();
3722   MachineFrameInfo &MFI = MF.getFrameInfo();
3723   EVT LocVT = VA.getLocVT();
3724   EVT ValVT = VA.getValVT();
3725   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
3726   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3727                                  VA.getLocMemOffset(), /*Immutable=*/true);
3728   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3729   SDValue Val;
3730 
3731   ISD::LoadExtType ExtType;
3732   switch (VA.getLocInfo()) {
3733   default:
3734     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3735   case CCValAssign::Full:
3736   case CCValAssign::Indirect:
3737   case CCValAssign::BCvt:
3738     ExtType = ISD::NON_EXTLOAD;
3739     break;
3740   }
3741   Val = DAG.getExtLoad(
3742       ExtType, DL, LocVT, Chain, FIN,
3743       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
3744   return Val;
3745 }
3746 
3747 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
3748                                        const CCValAssign &VA, const SDLoc &DL) {
3749   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
3750          "Unexpected VA");
3751   MachineFunction &MF = DAG.getMachineFunction();
3752   MachineFrameInfo &MFI = MF.getFrameInfo();
3753   MachineRegisterInfo &RegInfo = MF.getRegInfo();
3754 
3755   if (VA.isMemLoc()) {
3756     // f64 is passed on the stack.
3757     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
3758     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
3759     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
3760                        MachinePointerInfo::getFixedStack(MF, FI));
3761   }
3762 
3763   assert(VA.isRegLoc() && "Expected register VA assignment");
3764 
3765   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
3766   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
3767   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
3768   SDValue Hi;
3769   if (VA.getLocReg() == RISCV::X17) {
3770     // Second half of f64 is passed on the stack.
3771     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
3772     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
3773     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
3774                      MachinePointerInfo::getFixedStack(MF, FI));
3775   } else {
3776     // Second half of f64 is passed in another GPR.
3777     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
3778     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
3779     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
3780   }
3781   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
3782 }
3783 
3784 // FastCC has less than 1% performance improvement for some particular
3785 // benchmark. But theoretically, it may has benenfit for some cases.
3786 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
3787                             CCValAssign::LocInfo LocInfo,
3788                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
3789 
3790   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3791     // X5 and X6 might be used for save-restore libcall.
3792     static const MCPhysReg GPRList[] = {
3793         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
3794         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
3795         RISCV::X29, RISCV::X30, RISCV::X31};
3796     if (unsigned Reg = State.AllocateReg(GPRList)) {
3797       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3798       return false;
3799     }
3800   }
3801 
3802   if (LocVT == MVT::f16) {
3803     static const MCPhysReg FPR16List[] = {
3804         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
3805         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
3806         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
3807         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
3808     if (unsigned Reg = State.AllocateReg(FPR16List)) {
3809       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3810       return false;
3811     }
3812   }
3813 
3814   if (LocVT == MVT::f32) {
3815     static const MCPhysReg FPR32List[] = {
3816         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
3817         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
3818         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
3819         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
3820     if (unsigned Reg = State.AllocateReg(FPR32List)) {
3821       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3822       return false;
3823     }
3824   }
3825 
3826   if (LocVT == MVT::f64) {
3827     static const MCPhysReg FPR64List[] = {
3828         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
3829         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
3830         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
3831         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
3832     if (unsigned Reg = State.AllocateReg(FPR64List)) {
3833       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3834       return false;
3835     }
3836   }
3837 
3838   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
3839     unsigned Offset4 = State.AllocateStack(4, Align(4));
3840     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
3841     return false;
3842   }
3843 
3844   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
3845     unsigned Offset5 = State.AllocateStack(8, Align(8));
3846     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
3847     return false;
3848   }
3849 
3850   return true; // CC didn't match.
3851 }
3852 
3853 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3854                          CCValAssign::LocInfo LocInfo,
3855                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
3856 
3857   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3858     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
3859     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
3860     static const MCPhysReg GPRList[] = {
3861         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
3862         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
3863     if (unsigned Reg = State.AllocateReg(GPRList)) {
3864       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3865       return false;
3866     }
3867   }
3868 
3869   if (LocVT == MVT::f32) {
3870     // Pass in STG registers: F1, ..., F6
3871     //                        fs0 ... fs5
3872     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
3873                                           RISCV::F18_F, RISCV::F19_F,
3874                                           RISCV::F20_F, RISCV::F21_F};
3875     if (unsigned Reg = State.AllocateReg(FPR32List)) {
3876       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3877       return false;
3878     }
3879   }
3880 
3881   if (LocVT == MVT::f64) {
3882     // Pass in STG registers: D1, ..., D6
3883     //                        fs6 ... fs11
3884     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
3885                                           RISCV::F24_D, RISCV::F25_D,
3886                                           RISCV::F26_D, RISCV::F27_D};
3887     if (unsigned Reg = State.AllocateReg(FPR64List)) {
3888       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3889       return false;
3890     }
3891   }
3892 
3893   report_fatal_error("No registers left in GHC calling convention");
3894   return true;
3895 }
3896 
3897 // Transform physical registers into virtual registers.
3898 SDValue RISCVTargetLowering::LowerFormalArguments(
3899     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3900     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3901     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3902 
3903   MachineFunction &MF = DAG.getMachineFunction();
3904 
3905   switch (CallConv) {
3906   default:
3907     report_fatal_error("Unsupported calling convention");
3908   case CallingConv::C:
3909   case CallingConv::Fast:
3910     break;
3911   case CallingConv::GHC:
3912     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
3913         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
3914       report_fatal_error(
3915         "GHC calling convention requires the F and D instruction set extensions");
3916   }
3917 
3918   const Function &Func = MF.getFunction();
3919   if (Func.hasFnAttribute("interrupt")) {
3920     if (!Func.arg_empty())
3921       report_fatal_error(
3922         "Functions with the interrupt attribute cannot have arguments!");
3923 
3924     StringRef Kind =
3925       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
3926 
3927     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
3928       report_fatal_error(
3929         "Function interrupt attribute argument not supported!");
3930   }
3931 
3932   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3933   MVT XLenVT = Subtarget.getXLenVT();
3934   unsigned XLenInBytes = Subtarget.getXLen() / 8;
3935   // Used with vargs to acumulate store chains.
3936   std::vector<SDValue> OutChains;
3937 
3938   // Assign locations to all of the incoming arguments.
3939   SmallVector<CCValAssign, 16> ArgLocs;
3940   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3941 
3942   if (CallConv == CallingConv::Fast)
3943     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
3944   else if (CallConv == CallingConv::GHC)
3945     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
3946   else
3947     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
3948 
3949   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3950     CCValAssign &VA = ArgLocs[i];
3951     SDValue ArgValue;
3952     // Passing f64 on RV32D with a soft float ABI must be handled as a special
3953     // case.
3954     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
3955       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
3956     else if (VA.isRegLoc())
3957       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3958     else
3959       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3960 
3961     if (VA.getLocInfo() == CCValAssign::Indirect) {
3962       // If the original argument was split and passed by reference (e.g. i128
3963       // on RV32), we need to load all parts of it here (using the same
3964       // address).
3965       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3966                                    MachinePointerInfo()));
3967       unsigned ArgIndex = Ins[i].OrigArgIndex;
3968       assert(Ins[i].PartOffset == 0);
3969       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3970         CCValAssign &PartVA = ArgLocs[i + 1];
3971         unsigned PartOffset = Ins[i + 1].PartOffset;
3972         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
3973                                       DAG.getIntPtrConstant(PartOffset, DL));
3974         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3975                                      MachinePointerInfo()));
3976         ++i;
3977       }
3978       continue;
3979     }
3980     InVals.push_back(ArgValue);
3981   }
3982 
3983   if (IsVarArg) {
3984     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
3985     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3986     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
3987     MachineFrameInfo &MFI = MF.getFrameInfo();
3988     MachineRegisterInfo &RegInfo = MF.getRegInfo();
3989     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
3990 
3991     // Offset of the first variable argument from stack pointer, and size of
3992     // the vararg save area. For now, the varargs save area is either zero or
3993     // large enough to hold a0-a7.
3994     int VaArgOffset, VarArgsSaveSize;
3995 
3996     // If all registers are allocated, then all varargs must be passed on the
3997     // stack and we don't need to save any argregs.
3998     if (ArgRegs.size() == Idx) {
3999       VaArgOffset = CCInfo.getNextStackOffset();
4000       VarArgsSaveSize = 0;
4001     } else {
4002       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
4003       VaArgOffset = -VarArgsSaveSize;
4004     }
4005 
4006     // Record the frame index of the first variable argument
4007     // which is a value necessary to VASTART.
4008     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
4009     RVFI->setVarArgsFrameIndex(FI);
4010 
4011     // If saving an odd number of registers then create an extra stack slot to
4012     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
4013     // offsets to even-numbered registered remain 2*XLEN-aligned.
4014     if (Idx % 2) {
4015       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
4016       VarArgsSaveSize += XLenInBytes;
4017     }
4018 
4019     // Copy the integer registers that may have been used for passing varargs
4020     // to the vararg save area.
4021     for (unsigned I = Idx; I < ArgRegs.size();
4022          ++I, VaArgOffset += XLenInBytes) {
4023       const Register Reg = RegInfo.createVirtualRegister(RC);
4024       RegInfo.addLiveIn(ArgRegs[I], Reg);
4025       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
4026       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
4027       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4028       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
4029                                    MachinePointerInfo::getFixedStack(MF, FI));
4030       cast<StoreSDNode>(Store.getNode())
4031           ->getMemOperand()
4032           ->setValue((Value *)nullptr);
4033       OutChains.push_back(Store);
4034     }
4035     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
4036   }
4037 
4038   // All stores are grouped in one node to allow the matching between
4039   // the size of Ins and InVals. This only happens for vararg functions.
4040   if (!OutChains.empty()) {
4041     OutChains.push_back(Chain);
4042     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
4043   }
4044 
4045   return Chain;
4046 }
4047 
4048 /// isEligibleForTailCallOptimization - Check whether the call is eligible
4049 /// for tail call optimization.
4050 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
4051 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
4052     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4053     const SmallVector<CCValAssign, 16> &ArgLocs) const {
4054 
4055   auto &Callee = CLI.Callee;
4056   auto CalleeCC = CLI.CallConv;
4057   auto &Outs = CLI.Outs;
4058   auto &Caller = MF.getFunction();
4059   auto CallerCC = Caller.getCallingConv();
4060 
4061   // Exception-handling functions need a special set of instructions to
4062   // indicate a return to the hardware. Tail-calling another function would
4063   // probably break this.
4064   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
4065   // should be expanded as new function attributes are introduced.
4066   if (Caller.hasFnAttribute("interrupt"))
4067     return false;
4068 
4069   // Do not tail call opt if the stack is used to pass parameters.
4070   if (CCInfo.getNextStackOffset() != 0)
4071     return false;
4072 
4073   // Do not tail call opt if any parameters need to be passed indirectly.
4074   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
4075   // passed indirectly. So the address of the value will be passed in a
4076   // register, or if not available, then the address is put on the stack. In
4077   // order to pass indirectly, space on the stack often needs to be allocated
4078   // in order to store the value. In this case the CCInfo.getNextStackOffset()
4079   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
4080   // are passed CCValAssign::Indirect.
4081   for (auto &VA : ArgLocs)
4082     if (VA.getLocInfo() == CCValAssign::Indirect)
4083       return false;
4084 
4085   // Do not tail call opt if either caller or callee uses struct return
4086   // semantics.
4087   auto IsCallerStructRet = Caller.hasStructRetAttr();
4088   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4089   if (IsCallerStructRet || IsCalleeStructRet)
4090     return false;
4091 
4092   // Externally-defined functions with weak linkage should not be
4093   // tail-called. The behaviour of branch instructions in this situation (as
4094   // used for tail calls) is implementation-defined, so we cannot rely on the
4095   // linker replacing the tail call with a return.
4096   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4097     const GlobalValue *GV = G->getGlobal();
4098     if (GV->hasExternalWeakLinkage())
4099       return false;
4100   }
4101 
4102   // The callee has to preserve all registers the caller needs to preserve.
4103   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
4104   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4105   if (CalleeCC != CallerCC) {
4106     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4107     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4108       return false;
4109   }
4110 
4111   // Byval parameters hand the function a pointer directly into the stack area
4112   // we want to reuse during a tail call. Working around this *is* possible
4113   // but less efficient and uglier in LowerCall.
4114   for (auto &Arg : Outs)
4115     if (Arg.Flags.isByVal())
4116       return false;
4117 
4118   return true;
4119 }
4120 
4121 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4122 // and output parameter nodes.
4123 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
4124                                        SmallVectorImpl<SDValue> &InVals) const {
4125   SelectionDAG &DAG = CLI.DAG;
4126   SDLoc &DL = CLI.DL;
4127   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4128   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4129   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4130   SDValue Chain = CLI.Chain;
4131   SDValue Callee = CLI.Callee;
4132   bool &IsTailCall = CLI.IsTailCall;
4133   CallingConv::ID CallConv = CLI.CallConv;
4134   bool IsVarArg = CLI.IsVarArg;
4135   EVT PtrVT = getPointerTy(DAG.getDataLayout());
4136   MVT XLenVT = Subtarget.getXLenVT();
4137 
4138   MachineFunction &MF = DAG.getMachineFunction();
4139 
4140   // Analyze the operands of the call, assigning locations to each operand.
4141   SmallVector<CCValAssign, 16> ArgLocs;
4142   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4143 
4144   if (CallConv == CallingConv::Fast)
4145     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
4146   else if (CallConv == CallingConv::GHC)
4147     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
4148   else
4149     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
4150 
4151   // Check if it's really possible to do a tail call.
4152   if (IsTailCall)
4153     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4154 
4155   if (IsTailCall)
4156     ++NumTailCalls;
4157   else if (CLI.CB && CLI.CB->isMustTailCall())
4158     report_fatal_error("failed to perform tail call elimination on a call "
4159                        "site marked musttail");
4160 
4161   // Get a count of how many bytes are to be pushed on the stack.
4162   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
4163 
4164   // Create local copies for byval args
4165   SmallVector<SDValue, 8> ByValArgs;
4166   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4167     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4168     if (!Flags.isByVal())
4169       continue;
4170 
4171     SDValue Arg = OutVals[i];
4172     unsigned Size = Flags.getByValSize();
4173     Align Alignment = Flags.getNonZeroByValAlign();
4174 
4175     int FI =
4176         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4177     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4178     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
4179 
4180     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4181                           /*IsVolatile=*/false,
4182                           /*AlwaysInline=*/false, IsTailCall,
4183                           MachinePointerInfo(), MachinePointerInfo());
4184     ByValArgs.push_back(FIPtr);
4185   }
4186 
4187   if (!IsTailCall)
4188     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4189 
4190   // Copy argument values to their designated locations.
4191   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4192   SmallVector<SDValue, 8> MemOpChains;
4193   SDValue StackPtr;
4194   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4195     CCValAssign &VA = ArgLocs[i];
4196     SDValue ArgValue = OutVals[i];
4197     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4198 
4199     // Handle passing f64 on RV32D with a soft float ABI as a special case.
4200     bool IsF64OnRV32DSoftABI =
4201         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
4202     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
4203       SDValue SplitF64 = DAG.getNode(
4204           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
4205       SDValue Lo = SplitF64.getValue(0);
4206       SDValue Hi = SplitF64.getValue(1);
4207 
4208       Register RegLo = VA.getLocReg();
4209       RegsToPass.push_back(std::make_pair(RegLo, Lo));
4210 
4211       if (RegLo == RISCV::X17) {
4212         // Second half of f64 is passed on the stack.
4213         // Work out the address of the stack slot.
4214         if (!StackPtr.getNode())
4215           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
4216         // Emit the store.
4217         MemOpChains.push_back(
4218             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
4219       } else {
4220         // Second half of f64 is passed in another GPR.
4221         assert(RegLo < RISCV::X31 && "Invalid register pair");
4222         Register RegHigh = RegLo + 1;
4223         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
4224       }
4225       continue;
4226     }
4227 
4228     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
4229     // as any other MemLoc.
4230 
4231     // Promote the value if needed.
4232     // For now, only handle fully promoted and indirect arguments.
4233     if (VA.getLocInfo() == CCValAssign::Indirect) {
4234       // Store the argument in a stack slot and pass its address.
4235       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
4236       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4237       MemOpChains.push_back(
4238           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4239                        MachinePointerInfo::getFixedStack(MF, FI)));
4240       // If the original argument was split (e.g. i128), we need
4241       // to store all parts of it here (and pass just one address).
4242       unsigned ArgIndex = Outs[i].OrigArgIndex;
4243       assert(Outs[i].PartOffset == 0);
4244       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4245         SDValue PartValue = OutVals[i + 1];
4246         unsigned PartOffset = Outs[i + 1].PartOffset;
4247         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
4248                                       DAG.getIntPtrConstant(PartOffset, DL));
4249         MemOpChains.push_back(
4250             DAG.getStore(Chain, DL, PartValue, Address,
4251                          MachinePointerInfo::getFixedStack(MF, FI)));
4252         ++i;
4253       }
4254       ArgValue = SpillSlot;
4255     } else {
4256       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
4257     }
4258 
4259     // Use local copy if it is a byval arg.
4260     if (Flags.isByVal())
4261       ArgValue = ByValArgs[j++];
4262 
4263     if (VA.isRegLoc()) {
4264       // Queue up the argument copies and emit them at the end.
4265       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
4266     } else {
4267       assert(VA.isMemLoc() && "Argument not register or memory");
4268       assert(!IsTailCall && "Tail call not allowed if stack is used "
4269                             "for passing parameters");
4270 
4271       // Work out the address of the stack slot.
4272       if (!StackPtr.getNode())
4273         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
4274       SDValue Address =
4275           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
4276                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
4277 
4278       // Emit the store.
4279       MemOpChains.push_back(
4280           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
4281     }
4282   }
4283 
4284   // Join the stores, which are independent of one another.
4285   if (!MemOpChains.empty())
4286     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4287 
4288   SDValue Glue;
4289 
4290   // Build a sequence of copy-to-reg nodes, chained and glued together.
4291   for (auto &Reg : RegsToPass) {
4292     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
4293     Glue = Chain.getValue(1);
4294   }
4295 
4296   // Validate that none of the argument registers have been marked as
4297   // reserved, if so report an error. Do the same for the return address if this
4298   // is not a tailcall.
4299   validateCCReservedRegs(RegsToPass, MF);
4300   if (!IsTailCall &&
4301       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
4302     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
4303         MF.getFunction(),
4304         "Return address register required, but has been reserved."});
4305 
4306   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4307   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4308   // split it and then direct call can be matched by PseudoCALL.
4309   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
4310     const GlobalValue *GV = S->getGlobal();
4311 
4312     unsigned OpFlags = RISCVII::MO_CALL;
4313     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
4314       OpFlags = RISCVII::MO_PLT;
4315 
4316     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
4317   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4318     unsigned OpFlags = RISCVII::MO_CALL;
4319 
4320     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
4321                                                  nullptr))
4322       OpFlags = RISCVII::MO_PLT;
4323 
4324     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
4325   }
4326 
4327   // The first call operand is the chain and the second is the target address.
4328   SmallVector<SDValue, 8> Ops;
4329   Ops.push_back(Chain);
4330   Ops.push_back(Callee);
4331 
4332   // Add argument registers to the end of the list so that they are
4333   // known live into the call.
4334   for (auto &Reg : RegsToPass)
4335     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
4336 
4337   if (!IsTailCall) {
4338     // Add a register mask operand representing the call-preserved registers.
4339     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4340     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4341     assert(Mask && "Missing call preserved mask for calling convention");
4342     Ops.push_back(DAG.getRegisterMask(Mask));
4343   }
4344 
4345   // Glue the call to the argument copies, if any.
4346   if (Glue.getNode())
4347     Ops.push_back(Glue);
4348 
4349   // Emit the call.
4350   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4351 
4352   if (IsTailCall) {
4353     MF.getFrameInfo().setHasTailCall();
4354     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
4355   }
4356 
4357   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
4358   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4359   Glue = Chain.getValue(1);
4360 
4361   // Mark the end of the call, which is glued to the call itself.
4362   Chain = DAG.getCALLSEQ_END(Chain,
4363                              DAG.getConstant(NumBytes, DL, PtrVT, true),
4364                              DAG.getConstant(0, DL, PtrVT, true),
4365                              Glue, DL);
4366   Glue = Chain.getValue(1);
4367 
4368   // Assign locations to each value returned by this call.
4369   SmallVector<CCValAssign, 16> RVLocs;
4370   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4371   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
4372 
4373   // Copy all of the result registers out of their specified physreg.
4374   for (auto &VA : RVLocs) {
4375     // Copy the value out
4376     SDValue RetValue =
4377         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
4378     // Glue the RetValue to the end of the call sequence
4379     Chain = RetValue.getValue(1);
4380     Glue = RetValue.getValue(2);
4381 
4382     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
4383       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
4384       SDValue RetValue2 =
4385           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
4386       Chain = RetValue2.getValue(1);
4387       Glue = RetValue2.getValue(2);
4388       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
4389                              RetValue2);
4390     }
4391 
4392     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
4393 
4394     InVals.push_back(RetValue);
4395   }
4396 
4397   return Chain;
4398 }
4399 
4400 bool RISCVTargetLowering::CanLowerReturn(
4401     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4402     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4403   SmallVector<CCValAssign, 16> RVLocs;
4404   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4405 
4406   Optional<unsigned> FirstMaskArgument;
4407   if (Subtarget.hasStdExtV())
4408     FirstMaskArgument = preAssignMask(Outs);
4409 
4410   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4411     MVT VT = Outs[i].VT;
4412     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4413     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
4414     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
4415                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
4416                  *this, FirstMaskArgument))
4417       return false;
4418   }
4419   return true;
4420 }
4421 
4422 SDValue
4423 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
4424                                  bool IsVarArg,
4425                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
4426                                  const SmallVectorImpl<SDValue> &OutVals,
4427                                  const SDLoc &DL, SelectionDAG &DAG) const {
4428   const MachineFunction &MF = DAG.getMachineFunction();
4429   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
4430 
4431   // Stores the assignment of the return value to a location.
4432   SmallVector<CCValAssign, 16> RVLocs;
4433 
4434   // Info about the registers and stack slot.
4435   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4436                  *DAG.getContext());
4437 
4438   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4439                     nullptr);
4440 
4441   if (CallConv == CallingConv::GHC && !RVLocs.empty())
4442     report_fatal_error("GHC functions return void only");
4443 
4444   SDValue Glue;
4445   SmallVector<SDValue, 4> RetOps(1, Chain);
4446 
4447   // Copy the result values into the output registers.
4448   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4449     SDValue Val = OutVals[i];
4450     CCValAssign &VA = RVLocs[i];
4451     assert(VA.isRegLoc() && "Can only return in registers!");
4452 
4453     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
4454       // Handle returning f64 on RV32D with a soft float ABI.
4455       assert(VA.isRegLoc() && "Expected return via registers");
4456       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
4457                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
4458       SDValue Lo = SplitF64.getValue(0);
4459       SDValue Hi = SplitF64.getValue(1);
4460       Register RegLo = VA.getLocReg();
4461       assert(RegLo < RISCV::X31 && "Invalid register pair");
4462       Register RegHi = RegLo + 1;
4463 
4464       if (STI.isRegisterReservedByUser(RegLo) ||
4465           STI.isRegisterReservedByUser(RegHi))
4466         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
4467             MF.getFunction(),
4468             "Return value register required, but has been reserved."});
4469 
4470       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
4471       Glue = Chain.getValue(1);
4472       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
4473       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
4474       Glue = Chain.getValue(1);
4475       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
4476     } else {
4477       // Handle a 'normal' return.
4478       Val = convertValVTToLocVT(DAG, Val, VA, DL);
4479       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4480 
4481       if (STI.isRegisterReservedByUser(VA.getLocReg()))
4482         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
4483             MF.getFunction(),
4484             "Return value register required, but has been reserved."});
4485 
4486       // Guarantee that all emitted copies are stuck together.
4487       Glue = Chain.getValue(1);
4488       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4489     }
4490   }
4491 
4492   RetOps[0] = Chain; // Update chain.
4493 
4494   // Add the glue node if we have it.
4495   if (Glue.getNode()) {
4496     RetOps.push_back(Glue);
4497   }
4498 
4499   // Interrupt service routines use different return instructions.
4500   const Function &Func = DAG.getMachineFunction().getFunction();
4501   if (Func.hasFnAttribute("interrupt")) {
4502     if (!Func.getReturnType()->isVoidTy())
4503       report_fatal_error(
4504           "Functions with the interrupt attribute must have void return type!");
4505 
4506     MachineFunction &MF = DAG.getMachineFunction();
4507     StringRef Kind =
4508       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
4509 
4510     unsigned RetOpc;
4511     if (Kind == "user")
4512       RetOpc = RISCVISD::URET_FLAG;
4513     else if (Kind == "supervisor")
4514       RetOpc = RISCVISD::SRET_FLAG;
4515     else
4516       RetOpc = RISCVISD::MRET_FLAG;
4517 
4518     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
4519   }
4520 
4521   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
4522 }
4523 
4524 void RISCVTargetLowering::validateCCReservedRegs(
4525     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
4526     MachineFunction &MF) const {
4527   const Function &F = MF.getFunction();
4528   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
4529 
4530   if (llvm::any_of(Regs, [&STI](auto Reg) {
4531         return STI.isRegisterReservedByUser(Reg.first);
4532       }))
4533     F.getContext().diagnose(DiagnosticInfoUnsupported{
4534         F, "Argument register required, but has been reserved."});
4535 }
4536 
4537 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
4538   return CI->isTailCall();
4539 }
4540 
4541 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
4542 #define NODE_NAME_CASE(NODE)                                                   \
4543   case RISCVISD::NODE:                                                         \
4544     return "RISCVISD::" #NODE;
4545   // clang-format off
4546   switch ((RISCVISD::NodeType)Opcode) {
4547   case RISCVISD::FIRST_NUMBER:
4548     break;
4549   NODE_NAME_CASE(RET_FLAG)
4550   NODE_NAME_CASE(URET_FLAG)
4551   NODE_NAME_CASE(SRET_FLAG)
4552   NODE_NAME_CASE(MRET_FLAG)
4553   NODE_NAME_CASE(CALL)
4554   NODE_NAME_CASE(SELECT_CC)
4555   NODE_NAME_CASE(BuildPairF64)
4556   NODE_NAME_CASE(SplitF64)
4557   NODE_NAME_CASE(TAIL)
4558   NODE_NAME_CASE(SLLW)
4559   NODE_NAME_CASE(SRAW)
4560   NODE_NAME_CASE(SRLW)
4561   NODE_NAME_CASE(DIVW)
4562   NODE_NAME_CASE(DIVUW)
4563   NODE_NAME_CASE(REMUW)
4564   NODE_NAME_CASE(ROLW)
4565   NODE_NAME_CASE(RORW)
4566   NODE_NAME_CASE(FSLW)
4567   NODE_NAME_CASE(FSRW)
4568   NODE_NAME_CASE(FSL)
4569   NODE_NAME_CASE(FSR)
4570   NODE_NAME_CASE(FMV_H_X)
4571   NODE_NAME_CASE(FMV_X_ANYEXTH)
4572   NODE_NAME_CASE(FMV_W_X_RV64)
4573   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
4574   NODE_NAME_CASE(READ_CYCLE_WIDE)
4575   NODE_NAME_CASE(GREVI)
4576   NODE_NAME_CASE(GREVIW)
4577   NODE_NAME_CASE(GORCI)
4578   NODE_NAME_CASE(GORCIW)
4579   NODE_NAME_CASE(VMV_V_X_VL)
4580   NODE_NAME_CASE(VFMV_V_F_VL)
4581   NODE_NAME_CASE(VMV_X_S)
4582   NODE_NAME_CASE(SPLAT_VECTOR_I64)
4583   NODE_NAME_CASE(READ_VLENB)
4584   NODE_NAME_CASE(TRUNCATE_VECTOR)
4585   NODE_NAME_CASE(VLEFF)
4586   NODE_NAME_CASE(VLEFF_MASK)
4587   NODE_NAME_CASE(VSLIDEUP)
4588   NODE_NAME_CASE(VSLIDEDOWN)
4589   NODE_NAME_CASE(VID)
4590   NODE_NAME_CASE(VFNCVT_ROD)
4591   NODE_NAME_CASE(VECREDUCE_ADD)
4592   NODE_NAME_CASE(VECREDUCE_UMAX)
4593   NODE_NAME_CASE(VECREDUCE_SMAX)
4594   NODE_NAME_CASE(VECREDUCE_UMIN)
4595   NODE_NAME_CASE(VECREDUCE_SMIN)
4596   NODE_NAME_CASE(VECREDUCE_AND)
4597   NODE_NAME_CASE(VECREDUCE_OR)
4598   NODE_NAME_CASE(VECREDUCE_XOR)
4599   NODE_NAME_CASE(VECREDUCE_FADD)
4600   NODE_NAME_CASE(VECREDUCE_SEQ_FADD)
4601   NODE_NAME_CASE(ADD_VL)
4602   NODE_NAME_CASE(AND_VL)
4603   NODE_NAME_CASE(MUL_VL)
4604   NODE_NAME_CASE(OR_VL)
4605   NODE_NAME_CASE(SDIV_VL)
4606   NODE_NAME_CASE(SHL_VL)
4607   NODE_NAME_CASE(SREM_VL)
4608   NODE_NAME_CASE(SRA_VL)
4609   NODE_NAME_CASE(SRL_VL)
4610   NODE_NAME_CASE(SUB_VL)
4611   NODE_NAME_CASE(UDIV_VL)
4612   NODE_NAME_CASE(UREM_VL)
4613   NODE_NAME_CASE(XOR_VL)
4614   NODE_NAME_CASE(FADD_VL)
4615   NODE_NAME_CASE(FSUB_VL)
4616   NODE_NAME_CASE(FMUL_VL)
4617   NODE_NAME_CASE(FDIV_VL)
4618   NODE_NAME_CASE(FNEG_VL)
4619   NODE_NAME_CASE(FMA_VL)
4620   NODE_NAME_CASE(VMCLR_VL)
4621   NODE_NAME_CASE(VMSET_VL)
4622   NODE_NAME_CASE(VLE_VL)
4623   NODE_NAME_CASE(VSE_VL)
4624   }
4625   // clang-format on
4626   return nullptr;
4627 #undef NODE_NAME_CASE
4628 }
4629 
4630 /// getConstraintType - Given a constraint letter, return the type of
4631 /// constraint it is for this target.
4632 RISCVTargetLowering::ConstraintType
4633 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
4634   if (Constraint.size() == 1) {
4635     switch (Constraint[0]) {
4636     default:
4637       break;
4638     case 'f':
4639       return C_RegisterClass;
4640     case 'I':
4641     case 'J':
4642     case 'K':
4643       return C_Immediate;
4644     case 'A':
4645       return C_Memory;
4646     }
4647   }
4648   return TargetLowering::getConstraintType(Constraint);
4649 }
4650 
4651 std::pair<unsigned, const TargetRegisterClass *>
4652 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
4653                                                   StringRef Constraint,
4654                                                   MVT VT) const {
4655   // First, see if this is a constraint that directly corresponds to a
4656   // RISCV register class.
4657   if (Constraint.size() == 1) {
4658     switch (Constraint[0]) {
4659     case 'r':
4660       return std::make_pair(0U, &RISCV::GPRRegClass);
4661     case 'f':
4662       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
4663         return std::make_pair(0U, &RISCV::FPR16RegClass);
4664       if (Subtarget.hasStdExtF() && VT == MVT::f32)
4665         return std::make_pair(0U, &RISCV::FPR32RegClass);
4666       if (Subtarget.hasStdExtD() && VT == MVT::f64)
4667         return std::make_pair(0U, &RISCV::FPR64RegClass);
4668       break;
4669     default:
4670       break;
4671     }
4672   }
4673 
4674   // Clang will correctly decode the usage of register name aliases into their
4675   // official names. However, other frontends like `rustc` do not. This allows
4676   // users of these frontends to use the ABI names for registers in LLVM-style
4677   // register constraints.
4678   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
4679                                .Case("{zero}", RISCV::X0)
4680                                .Case("{ra}", RISCV::X1)
4681                                .Case("{sp}", RISCV::X2)
4682                                .Case("{gp}", RISCV::X3)
4683                                .Case("{tp}", RISCV::X4)
4684                                .Case("{t0}", RISCV::X5)
4685                                .Case("{t1}", RISCV::X6)
4686                                .Case("{t2}", RISCV::X7)
4687                                .Cases("{s0}", "{fp}", RISCV::X8)
4688                                .Case("{s1}", RISCV::X9)
4689                                .Case("{a0}", RISCV::X10)
4690                                .Case("{a1}", RISCV::X11)
4691                                .Case("{a2}", RISCV::X12)
4692                                .Case("{a3}", RISCV::X13)
4693                                .Case("{a4}", RISCV::X14)
4694                                .Case("{a5}", RISCV::X15)
4695                                .Case("{a6}", RISCV::X16)
4696                                .Case("{a7}", RISCV::X17)
4697                                .Case("{s2}", RISCV::X18)
4698                                .Case("{s3}", RISCV::X19)
4699                                .Case("{s4}", RISCV::X20)
4700                                .Case("{s5}", RISCV::X21)
4701                                .Case("{s6}", RISCV::X22)
4702                                .Case("{s7}", RISCV::X23)
4703                                .Case("{s8}", RISCV::X24)
4704                                .Case("{s9}", RISCV::X25)
4705                                .Case("{s10}", RISCV::X26)
4706                                .Case("{s11}", RISCV::X27)
4707                                .Case("{t3}", RISCV::X28)
4708                                .Case("{t4}", RISCV::X29)
4709                                .Case("{t5}", RISCV::X30)
4710                                .Case("{t6}", RISCV::X31)
4711                                .Default(RISCV::NoRegister);
4712   if (XRegFromAlias != RISCV::NoRegister)
4713     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
4714 
4715   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
4716   // TableGen record rather than the AsmName to choose registers for InlineAsm
4717   // constraints, plus we want to match those names to the widest floating point
4718   // register type available, manually select floating point registers here.
4719   //
4720   // The second case is the ABI name of the register, so that frontends can also
4721   // use the ABI names in register constraint lists.
4722   if (Subtarget.hasStdExtF()) {
4723     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
4724                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
4725                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
4726                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
4727                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
4728                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
4729                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
4730                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
4731                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
4732                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
4733                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
4734                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
4735                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
4736                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
4737                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
4738                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
4739                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
4740                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
4741                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
4742                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
4743                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
4744                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
4745                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
4746                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
4747                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
4748                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
4749                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
4750                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
4751                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
4752                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
4753                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
4754                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
4755                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
4756                         .Default(RISCV::NoRegister);
4757     if (FReg != RISCV::NoRegister) {
4758       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
4759       if (Subtarget.hasStdExtD()) {
4760         unsigned RegNo = FReg - RISCV::F0_F;
4761         unsigned DReg = RISCV::F0_D + RegNo;
4762         return std::make_pair(DReg, &RISCV::FPR64RegClass);
4763       }
4764       return std::make_pair(FReg, &RISCV::FPR32RegClass);
4765     }
4766   }
4767 
4768   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4769 }
4770 
4771 unsigned
4772 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
4773   // Currently only support length 1 constraints.
4774   if (ConstraintCode.size() == 1) {
4775     switch (ConstraintCode[0]) {
4776     case 'A':
4777       return InlineAsm::Constraint_A;
4778     default:
4779       break;
4780     }
4781   }
4782 
4783   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
4784 }
4785 
4786 void RISCVTargetLowering::LowerAsmOperandForConstraint(
4787     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
4788     SelectionDAG &DAG) const {
4789   // Currently only support length 1 constraints.
4790   if (Constraint.length() == 1) {
4791     switch (Constraint[0]) {
4792     case 'I':
4793       // Validate & create a 12-bit signed immediate operand.
4794       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4795         uint64_t CVal = C->getSExtValue();
4796         if (isInt<12>(CVal))
4797           Ops.push_back(
4798               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
4799       }
4800       return;
4801     case 'J':
4802       // Validate & create an integer zero operand.
4803       if (auto *C = dyn_cast<ConstantSDNode>(Op))
4804         if (C->getZExtValue() == 0)
4805           Ops.push_back(
4806               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
4807       return;
4808     case 'K':
4809       // Validate & create a 5-bit unsigned immediate operand.
4810       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4811         uint64_t CVal = C->getZExtValue();
4812         if (isUInt<5>(CVal))
4813           Ops.push_back(
4814               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
4815       }
4816       return;
4817     default:
4818       break;
4819     }
4820   }
4821   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4822 }
4823 
4824 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
4825                                                    Instruction *Inst,
4826                                                    AtomicOrdering Ord) const {
4827   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
4828     return Builder.CreateFence(Ord);
4829   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
4830     return Builder.CreateFence(AtomicOrdering::Release);
4831   return nullptr;
4832 }
4833 
4834 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
4835                                                     Instruction *Inst,
4836                                                     AtomicOrdering Ord) const {
4837   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
4838     return Builder.CreateFence(AtomicOrdering::Acquire);
4839   return nullptr;
4840 }
4841 
4842 TargetLowering::AtomicExpansionKind
4843 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
4844   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
4845   // point operations can't be used in an lr/sc sequence without breaking the
4846   // forward-progress guarantee.
4847   if (AI->isFloatingPointOperation())
4848     return AtomicExpansionKind::CmpXChg;
4849 
4850   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4851   if (Size == 8 || Size == 16)
4852     return AtomicExpansionKind::MaskedIntrinsic;
4853   return AtomicExpansionKind::None;
4854 }
4855 
4856 static Intrinsic::ID
4857 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
4858   if (XLen == 32) {
4859     switch (BinOp) {
4860     default:
4861       llvm_unreachable("Unexpected AtomicRMW BinOp");
4862     case AtomicRMWInst::Xchg:
4863       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
4864     case AtomicRMWInst::Add:
4865       return Intrinsic::riscv_masked_atomicrmw_add_i32;
4866     case AtomicRMWInst::Sub:
4867       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
4868     case AtomicRMWInst::Nand:
4869       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
4870     case AtomicRMWInst::Max:
4871       return Intrinsic::riscv_masked_atomicrmw_max_i32;
4872     case AtomicRMWInst::Min:
4873       return Intrinsic::riscv_masked_atomicrmw_min_i32;
4874     case AtomicRMWInst::UMax:
4875       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
4876     case AtomicRMWInst::UMin:
4877       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
4878     }
4879   }
4880 
4881   if (XLen == 64) {
4882     switch (BinOp) {
4883     default:
4884       llvm_unreachable("Unexpected AtomicRMW BinOp");
4885     case AtomicRMWInst::Xchg:
4886       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
4887     case AtomicRMWInst::Add:
4888       return Intrinsic::riscv_masked_atomicrmw_add_i64;
4889     case AtomicRMWInst::Sub:
4890       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
4891     case AtomicRMWInst::Nand:
4892       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
4893     case AtomicRMWInst::Max:
4894       return Intrinsic::riscv_masked_atomicrmw_max_i64;
4895     case AtomicRMWInst::Min:
4896       return Intrinsic::riscv_masked_atomicrmw_min_i64;
4897     case AtomicRMWInst::UMax:
4898       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
4899     case AtomicRMWInst::UMin:
4900       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
4901     }
4902   }
4903 
4904   llvm_unreachable("Unexpected XLen\n");
4905 }
4906 
4907 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
4908     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4909     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4910   unsigned XLen = Subtarget.getXLen();
4911   Value *Ordering =
4912       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
4913   Type *Tys[] = {AlignedAddr->getType()};
4914   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
4915       AI->getModule(),
4916       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
4917 
4918   if (XLen == 64) {
4919     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4920     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4921     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4922   }
4923 
4924   Value *Result;
4925 
4926   // Must pass the shift amount needed to sign extend the loaded value prior
4927   // to performing a signed comparison for min/max. ShiftAmt is the number of
4928   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
4929   // is the number of bits to left+right shift the value in order to
4930   // sign-extend.
4931   if (AI->getOperation() == AtomicRMWInst::Min ||
4932       AI->getOperation() == AtomicRMWInst::Max) {
4933     const DataLayout &DL = AI->getModule()->getDataLayout();
4934     unsigned ValWidth =
4935         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4936     Value *SextShamt =
4937         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
4938     Result = Builder.CreateCall(LrwOpScwLoop,
4939                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4940   } else {
4941     Result =
4942         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4943   }
4944 
4945   if (XLen == 64)
4946     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4947   return Result;
4948 }
4949 
4950 TargetLowering::AtomicExpansionKind
4951 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
4952     AtomicCmpXchgInst *CI) const {
4953   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
4954   if (Size == 8 || Size == 16)
4955     return AtomicExpansionKind::MaskedIntrinsic;
4956   return AtomicExpansionKind::None;
4957 }
4958 
4959 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
4960     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4961     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4962   unsigned XLen = Subtarget.getXLen();
4963   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
4964   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
4965   if (XLen == 64) {
4966     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4967     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4968     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4969     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
4970   }
4971   Type *Tys[] = {AlignedAddr->getType()};
4972   Function *MaskedCmpXchg =
4973       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4974   Value *Result = Builder.CreateCall(
4975       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
4976   if (XLen == 64)
4977     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4978   return Result;
4979 }
4980 
4981 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
4982                                                      EVT VT) const {
4983   VT = VT.getScalarType();
4984 
4985   if (!VT.isSimple())
4986     return false;
4987 
4988   switch (VT.getSimpleVT().SimpleTy) {
4989   case MVT::f16:
4990     return Subtarget.hasStdExtZfh();
4991   case MVT::f32:
4992     return Subtarget.hasStdExtF();
4993   case MVT::f64:
4994     return Subtarget.hasStdExtD();
4995   default:
4996     break;
4997   }
4998 
4999   return false;
5000 }
5001 
5002 Register RISCVTargetLowering::getExceptionPointerRegister(
5003     const Constant *PersonalityFn) const {
5004   return RISCV::X10;
5005 }
5006 
5007 Register RISCVTargetLowering::getExceptionSelectorRegister(
5008     const Constant *PersonalityFn) const {
5009   return RISCV::X11;
5010 }
5011 
5012 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
5013   // Return false to suppress the unnecessary extensions if the LibCall
5014   // arguments or return value is f32 type for LP64 ABI.
5015   RISCVABI::ABI ABI = Subtarget.getTargetABI();
5016   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
5017     return false;
5018 
5019   return true;
5020 }
5021 
5022 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
5023   if (Subtarget.is64Bit() && Type == MVT::i32)
5024     return true;
5025 
5026   return IsSigned;
5027 }
5028 
5029 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5030                                                  SDValue C) const {
5031   // Check integral scalar types.
5032   if (VT.isScalarInteger()) {
5033     // Omit the optimization if the sub target has the M extension and the data
5034     // size exceeds XLen.
5035     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
5036       return false;
5037     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
5038       // Break the MUL to a SLLI and an ADD/SUB.
5039       const APInt &Imm = ConstNode->getAPIntValue();
5040       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
5041           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
5042         return true;
5043       // Omit the following optimization if the sub target has the M extension
5044       // and the data size >= XLen.
5045       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
5046         return false;
5047       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
5048       // a pair of LUI/ADDI.
5049       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
5050         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
5051         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
5052             (1 - ImmS).isPowerOf2())
5053         return true;
5054       }
5055     }
5056   }
5057 
5058   return false;
5059 }
5060 
5061 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
5062   if (!Subtarget.useRVVForFixedLengthVectors())
5063     return false;
5064 
5065   if (!VT.isFixedLengthVector())
5066     return false;
5067 
5068   // Don't use RVV for vectors we cannot scalarize if required.
5069   switch (VT.getVectorElementType().SimpleTy) {
5070   default:
5071     return false;
5072   case MVT::i1:
5073   case MVT::i8:
5074   case MVT::i16:
5075   case MVT::i32:
5076   case MVT::i64:
5077     break;
5078   case MVT::f16:
5079     if (!Subtarget.hasStdExtZfh())
5080       return false;
5081     break;
5082   case MVT::f32:
5083     if (!Subtarget.hasStdExtF())
5084       return false;
5085     break;
5086   case MVT::f64:
5087     if (!Subtarget.hasStdExtD())
5088       return false;
5089     break;
5090   }
5091 
5092   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
5093   // Don't use RVV for types that don't fit.
5094   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
5095     return false;
5096 
5097   // TODO: Perhaps an artificial restriction, but worth having whilst getting
5098   // the base fixed length RVV support in place.
5099   if (!VT.isPow2VectorType())
5100     return false;
5101 
5102   return true;
5103 }
5104 
5105 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
5106     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
5107     bool *Fast) const {
5108   if (!VT.isScalableVector())
5109     return false;
5110 
5111   EVT ElemVT = VT.getVectorElementType();
5112   if (Alignment >= ElemVT.getStoreSize()) {
5113     if (Fast)
5114       *Fast = true;
5115     return true;
5116   }
5117 
5118   return false;
5119 }
5120 
5121 #define GET_REGISTER_MATCHER
5122 #include "RISCVGenAsmMatcher.inc"
5123 
5124 Register
5125 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
5126                                        const MachineFunction &MF) const {
5127   Register Reg = MatchRegisterAltName(RegName);
5128   if (Reg == RISCV::NoRegister)
5129     Reg = MatchRegisterName(RegName);
5130   if (Reg == RISCV::NoRegister)
5131     report_fatal_error(
5132         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
5133   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
5134   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
5135     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
5136                              StringRef(RegName) + "\"."));
5137   return Reg;
5138 }
5139 
5140 namespace llvm {
5141 namespace RISCVVIntrinsicsTable {
5142 
5143 #define GET_RISCVVIntrinsicsTable_IMPL
5144 #include "RISCVGenSearchableTables.inc"
5145 
5146 } // namespace RISCVVIntrinsicsTable
5147 
5148 namespace RISCVZvlssegTable {
5149 
5150 #define GET_RISCVZvlssegTable_IMPL
5151 #include "RISCVGenSearchableTables.inc"
5152 
5153 } // namespace RISCVZvlssegTable
5154 } // namespace llvm
5155