1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtZfh())
87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88   if (Subtarget.hasStdExtF())
89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90   if (Subtarget.hasStdExtD())
91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93   static const MVT::SimpleValueType BoolVecVTs[] = {
94       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
95       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
96   static const MVT::SimpleValueType IntVecVTs[] = {
97       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
98       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
99       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
100       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
101       MVT::nxv4i64, MVT::nxv8i64};
102   static const MVT::SimpleValueType F16VecVTs[] = {
103       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
104       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
105   static const MVT::SimpleValueType F32VecVTs[] = {
106       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
107   static const MVT::SimpleValueType F64VecVTs[] = {
108       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
109 
110   if (Subtarget.hasStdExtV()) {
111     auto addRegClassForRVV = [this](MVT VT) {
112       unsigned Size = VT.getSizeInBits().getKnownMinValue();
113       assert(Size <= 512 && isPowerOf2_32(Size));
114       const TargetRegisterClass *RC;
115       if (Size <= 64)
116         RC = &RISCV::VRRegClass;
117       else if (Size == 128)
118         RC = &RISCV::VRM2RegClass;
119       else if (Size == 256)
120         RC = &RISCV::VRM4RegClass;
121       else
122         RC = &RISCV::VRM8RegClass;
123 
124       addRegisterClass(VT, RC);
125     };
126 
127     for (MVT VT : BoolVecVTs)
128       addRegClassForRVV(VT);
129     for (MVT VT : IntVecVTs)
130       addRegClassForRVV(VT);
131 
132     if (Subtarget.hasStdExtZfh())
133       for (MVT VT : F16VecVTs)
134         addRegClassForRVV(VT);
135 
136     if (Subtarget.hasStdExtF())
137       for (MVT VT : F32VecVTs)
138         addRegClassForRVV(VT);
139 
140     if (Subtarget.hasStdExtD())
141       for (MVT VT : F64VecVTs)
142         addRegClassForRVV(VT);
143 
144     if (Subtarget.useRVVForFixedLengthVectors()) {
145       auto addRegClassForFixedVectors = [this](MVT VT) {
146         unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
147         const TargetRegisterClass *RC;
148         if (LMul == 1)
149           RC = &RISCV::VRRegClass;
150         else if (LMul == 2)
151           RC = &RISCV::VRM2RegClass;
152         else if (LMul == 4)
153           RC = &RISCV::VRM4RegClass;
154         else if (LMul == 8)
155           RC = &RISCV::VRM8RegClass;
156         else
157           llvm_unreachable("Unexpected LMul!");
158 
159         addRegisterClass(VT, RC);
160       };
161       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
162         if (useRVVForFixedLengthVectorVT(VT))
163           addRegClassForFixedVectors(VT);
164 
165       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
166         if (useRVVForFixedLengthVectorVT(VT))
167           addRegClassForFixedVectors(VT);
168     }
169   }
170 
171   // Compute derived properties from the register classes.
172   computeRegisterProperties(STI.getRegisterInfo());
173 
174   setStackPointerRegisterToSaveRestore(RISCV::X2);
175 
176   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
177     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
178 
179   // TODO: add all necessary setOperationAction calls.
180   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
181 
182   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
183   setOperationAction(ISD::BR_CC, XLenVT, Expand);
184   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
185 
186   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
187   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
188 
189   setOperationAction(ISD::VASTART, MVT::Other, Custom);
190   setOperationAction(ISD::VAARG, MVT::Other, Expand);
191   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
192   setOperationAction(ISD::VAEND, MVT::Other, Expand);
193 
194   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
195   if (!Subtarget.hasStdExtZbb()) {
196     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
197     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
198   }
199 
200   if (Subtarget.is64Bit()) {
201     setOperationAction(ISD::ADD, MVT::i32, Custom);
202     setOperationAction(ISD::SUB, MVT::i32, Custom);
203     setOperationAction(ISD::SHL, MVT::i32, Custom);
204     setOperationAction(ISD::SRA, MVT::i32, Custom);
205     setOperationAction(ISD::SRL, MVT::i32, Custom);
206   }
207 
208   if (!Subtarget.hasStdExtM()) {
209     setOperationAction(ISD::MUL, XLenVT, Expand);
210     setOperationAction(ISD::MULHS, XLenVT, Expand);
211     setOperationAction(ISD::MULHU, XLenVT, Expand);
212     setOperationAction(ISD::SDIV, XLenVT, Expand);
213     setOperationAction(ISD::UDIV, XLenVT, Expand);
214     setOperationAction(ISD::SREM, XLenVT, Expand);
215     setOperationAction(ISD::UREM, XLenVT, Expand);
216   }
217 
218   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
219     setOperationAction(ISD::MUL, MVT::i32, Custom);
220 
221     setOperationAction(ISD::SDIV, MVT::i8, Custom);
222     setOperationAction(ISD::UDIV, MVT::i8, Custom);
223     setOperationAction(ISD::UREM, MVT::i8, Custom);
224     setOperationAction(ISD::SDIV, MVT::i16, Custom);
225     setOperationAction(ISD::UDIV, MVT::i16, Custom);
226     setOperationAction(ISD::UREM, MVT::i16, Custom);
227     setOperationAction(ISD::SDIV, MVT::i32, Custom);
228     setOperationAction(ISD::UDIV, MVT::i32, Custom);
229     setOperationAction(ISD::UREM, MVT::i32, Custom);
230   }
231 
232   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
233   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
234   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
235   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
236 
237   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
238   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
239   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
240 
241   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
242     if (Subtarget.is64Bit()) {
243       setOperationAction(ISD::ROTL, MVT::i32, Custom);
244       setOperationAction(ISD::ROTR, MVT::i32, Custom);
245     }
246   } else {
247     setOperationAction(ISD::ROTL, XLenVT, Expand);
248     setOperationAction(ISD::ROTR, XLenVT, Expand);
249   }
250 
251   if (Subtarget.hasStdExtZbp()) {
252     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
253     // more combining.
254     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
255     setOperationAction(ISD::BSWAP, XLenVT, Custom);
256 
257     if (Subtarget.is64Bit()) {
258       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
259       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
260     }
261   } else {
262     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
263     // pattern match it directly in isel.
264     setOperationAction(ISD::BSWAP, XLenVT,
265                        Subtarget.hasStdExtZbb() ? Legal : Expand);
266   }
267 
268   if (Subtarget.hasStdExtZbb()) {
269     setOperationAction(ISD::SMIN, XLenVT, Legal);
270     setOperationAction(ISD::SMAX, XLenVT, Legal);
271     setOperationAction(ISD::UMIN, XLenVT, Legal);
272     setOperationAction(ISD::UMAX, XLenVT, Legal);
273   } else {
274     setOperationAction(ISD::CTTZ, XLenVT, Expand);
275     setOperationAction(ISD::CTLZ, XLenVT, Expand);
276     setOperationAction(ISD::CTPOP, XLenVT, Expand);
277   }
278 
279   if (Subtarget.hasStdExtZbt()) {
280     setOperationAction(ISD::FSHL, XLenVT, Custom);
281     setOperationAction(ISD::FSHR, XLenVT, Custom);
282     setOperationAction(ISD::SELECT, XLenVT, Legal);
283 
284     if (Subtarget.is64Bit()) {
285       setOperationAction(ISD::FSHL, MVT::i32, Custom);
286       setOperationAction(ISD::FSHR, MVT::i32, Custom);
287     }
288   } else {
289     setOperationAction(ISD::SELECT, XLenVT, Custom);
290   }
291 
292   ISD::CondCode FPCCToExpand[] = {
293       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
294       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
295       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
296 
297   ISD::NodeType FPOpToExpand[] = {
298       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
299       ISD::FP_TO_FP16};
300 
301   if (Subtarget.hasStdExtZfh())
302     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
303 
304   if (Subtarget.hasStdExtZfh()) {
305     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
306     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
307     for (auto CC : FPCCToExpand)
308       setCondCodeAction(CC, MVT::f16, Expand);
309     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
310     setOperationAction(ISD::SELECT, MVT::f16, Custom);
311     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
312     for (auto Op : FPOpToExpand)
313       setOperationAction(Op, MVT::f16, Expand);
314   }
315 
316   if (Subtarget.hasStdExtF()) {
317     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
318     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
319     for (auto CC : FPCCToExpand)
320       setCondCodeAction(CC, MVT::f32, Expand);
321     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
322     setOperationAction(ISD::SELECT, MVT::f32, Custom);
323     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
324     for (auto Op : FPOpToExpand)
325       setOperationAction(Op, MVT::f32, Expand);
326     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
327     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
328   }
329 
330   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
331     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
332 
333   if (Subtarget.hasStdExtD()) {
334     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
335     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
336     for (auto CC : FPCCToExpand)
337       setCondCodeAction(CC, MVT::f64, Expand);
338     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
339     setOperationAction(ISD::SELECT, MVT::f64, Custom);
340     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
341     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
342     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
343     for (auto Op : FPOpToExpand)
344       setOperationAction(Op, MVT::f64, Expand);
345     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
346     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
347   }
348 
349   if (Subtarget.is64Bit()) {
350     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
351     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
352     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
353     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
354   }
355 
356   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
357   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
358   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
359   setOperationAction(ISD::JumpTable, XLenVT, Custom);
360 
361   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
362 
363   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
364   // Unfortunately this can't be determined just from the ISA naming string.
365   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
366                      Subtarget.is64Bit() ? Legal : Custom);
367 
368   setOperationAction(ISD::TRAP, MVT::Other, Legal);
369   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
370   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
371 
372   if (Subtarget.hasStdExtA()) {
373     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
374     setMinCmpXchgSizeInBits(32);
375   } else {
376     setMaxAtomicSizeInBitsSupported(0);
377   }
378 
379   setBooleanContents(ZeroOrOneBooleanContent);
380 
381   if (Subtarget.hasStdExtV()) {
382     setBooleanVectorContents(ZeroOrOneBooleanContent);
383 
384     setOperationAction(ISD::VSCALE, XLenVT, Custom);
385 
386     // RVV intrinsics may have illegal operands.
387     // We also need to custom legalize vmv.x.s.
388     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
389     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
390     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
391     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
392     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
393     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
394 
395     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
396 
397     if (Subtarget.is64Bit()) {
398       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
399       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
400     } else {
401       // We must custom-lower certain vXi64 operations on RV32 due to the vector
402       // element type being illegal.
403       setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
404       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
405       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
406 
407       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
408       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
409       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
410       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
411       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
412       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
413       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
414       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
415     }
416 
417     for (MVT VT : BoolVecVTs) {
418       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
419 
420       // Mask VTs are custom-expanded into a series of standard nodes
421       setOperationAction(ISD::TRUNCATE, VT, Custom);
422     }
423 
424     for (MVT VT : IntVecVTs) {
425       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
426 
427       setOperationAction(ISD::SMIN, VT, Legal);
428       setOperationAction(ISD::SMAX, VT, Legal);
429       setOperationAction(ISD::UMIN, VT, Legal);
430       setOperationAction(ISD::UMAX, VT, Legal);
431 
432       setOperationAction(ISD::ROTL, VT, Expand);
433       setOperationAction(ISD::ROTR, VT, Expand);
434 
435       // Custom-lower extensions and truncations from/to mask types.
436       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
437       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
438       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
439 
440       // RVV has native int->float & float->int conversions where the
441       // element type sizes are within one power-of-two of each other. Any
442       // wider distances between type sizes have to be lowered as sequences
443       // which progressively narrow the gap in stages.
444       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
445       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
446       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
447       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
448 
449       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR"
450       // nodes which truncate by one power of two at a time.
451       setOperationAction(ISD::TRUNCATE, VT, Custom);
452 
453       // Custom-lower insert/extract operations to simplify patterns.
454       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
455       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
456 
457       // Custom-lower reduction operations to set up the corresponding custom
458       // nodes' operands.
459       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
460       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
461       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
462       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
463       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
464       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
465       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
466       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
467     }
468 
469     // Expand various CCs to best match the RVV ISA, which natively supports UNE
470     // but no other unordered comparisons, and supports all ordered comparisons
471     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
472     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
473     // and we pattern-match those back to the "original", swapping operands once
474     // more. This way we catch both operations and both "vf" and "fv" forms with
475     // fewer patterns.
476     ISD::CondCode VFPCCToExpand[] = {
477         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
478         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
479         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
480     };
481 
482     // Sets common operation actions on RVV floating-point vector types.
483     const auto SetCommonVFPActions = [&](MVT VT) {
484       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
485       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
486       // sizes are within one power-of-two of each other. Therefore conversions
487       // between vXf16 and vXf64 must be lowered as sequences which convert via
488       // vXf32.
489       setOperationAction(ISD::FP_ROUND, VT, Custom);
490       setOperationAction(ISD::FP_EXTEND, VT, Custom);
491       // Custom-lower insert/extract operations to simplify patterns.
492       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
493       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
494       // Expand various condition codes (explained above).
495       for (auto CC : VFPCCToExpand)
496         setCondCodeAction(CC, VT, Expand);
497 
498       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
499       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
500     };
501 
502     if (Subtarget.hasStdExtZfh())
503       for (MVT VT : F16VecVTs)
504         SetCommonVFPActions(VT);
505 
506     if (Subtarget.hasStdExtF())
507       for (MVT VT : F32VecVTs)
508         SetCommonVFPActions(VT);
509 
510     if (Subtarget.hasStdExtD())
511       for (MVT VT : F64VecVTs)
512         SetCommonVFPActions(VT);
513 
514     if (Subtarget.useRVVForFixedLengthVectors()) {
515       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
516         if (!useRVVForFixedLengthVectorVT(VT))
517           continue;
518 
519         // By default everything must be expanded.
520         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
521           setOperationAction(Op, VT, Expand);
522 
523         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
524         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
525 
526         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
527 
528         setOperationAction(ISD::LOAD, VT, Custom);
529         setOperationAction(ISD::STORE, VT, Custom);
530 
531         // Operations below are not valid for masks.
532         if (VT.getVectorElementType() == MVT::i1)
533           continue;
534 
535         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
536 
537         setOperationAction(ISD::ADD, VT, Custom);
538         setOperationAction(ISD::MUL, VT, Custom);
539         setOperationAction(ISD::SUB, VT, Custom);
540         setOperationAction(ISD::AND, VT, Custom);
541         setOperationAction(ISD::OR, VT, Custom);
542         setOperationAction(ISD::XOR, VT, Custom);
543         setOperationAction(ISD::SDIV, VT, Custom);
544         setOperationAction(ISD::SREM, VT, Custom);
545         setOperationAction(ISD::UDIV, VT, Custom);
546         setOperationAction(ISD::UREM, VT, Custom);
547         setOperationAction(ISD::SHL, VT, Custom);
548         setOperationAction(ISD::SRA, VT, Custom);
549         setOperationAction(ISD::SRL, VT, Custom);
550       }
551 
552       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
553         if (!useRVVForFixedLengthVectorVT(VT))
554           continue;
555 
556         // By default everything must be expanded.
557         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
558           setOperationAction(Op, VT, Expand);
559 
560         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
561         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
562 
563         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
564         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
565 
566         setOperationAction(ISD::LOAD, VT, Custom);
567         setOperationAction(ISD::STORE, VT, Custom);
568         setOperationAction(ISD::FADD, VT, Custom);
569         setOperationAction(ISD::FSUB, VT, Custom);
570         setOperationAction(ISD::FMUL, VT, Custom);
571         setOperationAction(ISD::FDIV, VT, Custom);
572         setOperationAction(ISD::FNEG, VT, Custom);
573         setOperationAction(ISD::FMA, VT, Custom);
574       }
575     }
576   }
577 
578   // Function alignments.
579   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
580   setMinFunctionAlignment(FunctionAlignment);
581   setPrefFunctionAlignment(FunctionAlignment);
582 
583   setMinimumJumpTableEntries(5);
584 
585   // Jumps are expensive, compared to logic
586   setJumpIsExpensive();
587 
588   // We can use any register for comparisons
589   setHasMultipleConditionRegisters();
590 
591   setTargetDAGCombine(ISD::SETCC);
592   if (Subtarget.hasStdExtZbp()) {
593     setTargetDAGCombine(ISD::OR);
594   }
595 }
596 
597 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
598                                             LLVMContext &Context,
599                                             EVT VT) const {
600   if (!VT.isVector())
601     return getPointerTy(DL);
602   if (Subtarget.hasStdExtV() && VT.isScalableVector())
603     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
604   return VT.changeVectorElementTypeToInteger();
605 }
606 
607 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
608                                              const CallInst &I,
609                                              MachineFunction &MF,
610                                              unsigned Intrinsic) const {
611   switch (Intrinsic) {
612   default:
613     return false;
614   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
615   case Intrinsic::riscv_masked_atomicrmw_add_i32:
616   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
617   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
618   case Intrinsic::riscv_masked_atomicrmw_max_i32:
619   case Intrinsic::riscv_masked_atomicrmw_min_i32:
620   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
621   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
622   case Intrinsic::riscv_masked_cmpxchg_i32:
623     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
624     Info.opc = ISD::INTRINSIC_W_CHAIN;
625     Info.memVT = MVT::getVT(PtrTy->getElementType());
626     Info.ptrVal = I.getArgOperand(0);
627     Info.offset = 0;
628     Info.align = Align(4);
629     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
630                  MachineMemOperand::MOVolatile;
631     return true;
632   }
633 }
634 
635 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
636                                                 const AddrMode &AM, Type *Ty,
637                                                 unsigned AS,
638                                                 Instruction *I) const {
639   // No global is ever allowed as a base.
640   if (AM.BaseGV)
641     return false;
642 
643   // Require a 12-bit signed offset.
644   if (!isInt<12>(AM.BaseOffs))
645     return false;
646 
647   switch (AM.Scale) {
648   case 0: // "r+i" or just "i", depending on HasBaseReg.
649     break;
650   case 1:
651     if (!AM.HasBaseReg) // allow "r+i".
652       break;
653     return false; // disallow "r+r" or "r+r+i".
654   default:
655     return false;
656   }
657 
658   return true;
659 }
660 
661 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
662   return isInt<12>(Imm);
663 }
664 
665 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
666   return isInt<12>(Imm);
667 }
668 
669 // On RV32, 64-bit integers are split into their high and low parts and held
670 // in two different registers, so the trunc is free since the low register can
671 // just be used.
672 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
673   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
674     return false;
675   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
676   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
677   return (SrcBits == 64 && DestBits == 32);
678 }
679 
680 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
681   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
682       !SrcVT.isInteger() || !DstVT.isInteger())
683     return false;
684   unsigned SrcBits = SrcVT.getSizeInBits();
685   unsigned DestBits = DstVT.getSizeInBits();
686   return (SrcBits == 64 && DestBits == 32);
687 }
688 
689 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
690   // Zexts are free if they can be combined with a load.
691   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
692     EVT MemVT = LD->getMemoryVT();
693     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
694          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
695         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
696          LD->getExtensionType() == ISD::ZEXTLOAD))
697       return true;
698   }
699 
700   return TargetLowering::isZExtFree(Val, VT2);
701 }
702 
703 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
704   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
705 }
706 
707 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
708   return Subtarget.hasStdExtZbb();
709 }
710 
711 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
712   return Subtarget.hasStdExtZbb();
713 }
714 
715 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
716                                        bool ForCodeSize) const {
717   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
718     return false;
719   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
720     return false;
721   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
722     return false;
723   if (Imm.isNegZero())
724     return false;
725   return Imm.isZero();
726 }
727 
728 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
729   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
730          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
731          (VT == MVT::f64 && Subtarget.hasStdExtD());
732 }
733 
734 // Changes the condition code and swaps operands if necessary, so the SetCC
735 // operation matches one of the comparisons supported directly in the RISC-V
736 // ISA.
737 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
738   switch (CC) {
739   default:
740     break;
741   case ISD::SETGT:
742   case ISD::SETLE:
743   case ISD::SETUGT:
744   case ISD::SETULE:
745     CC = ISD::getSetCCSwappedOperands(CC);
746     std::swap(LHS, RHS);
747     break;
748   }
749 }
750 
751 // Return the RISC-V branch opcode that matches the given DAG integer
752 // condition code. The CondCode must be one of those supported by the RISC-V
753 // ISA (see normaliseSetCC).
754 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
755   switch (CC) {
756   default:
757     llvm_unreachable("Unsupported CondCode");
758   case ISD::SETEQ:
759     return RISCV::BEQ;
760   case ISD::SETNE:
761     return RISCV::BNE;
762   case ISD::SETLT:
763     return RISCV::BLT;
764   case ISD::SETGE:
765     return RISCV::BGE;
766   case ISD::SETULT:
767     return RISCV::BLTU;
768   case ISD::SETUGE:
769     return RISCV::BGEU;
770   }
771 }
772 
773 // Return the largest legal scalable vector type that matches VT's element type.
774 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
775                                             const RISCVSubtarget &Subtarget) {
776   assert(VT.isFixedLengthVector() &&
777          DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
778          "Expected legal fixed length vector!");
779 
780   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
781   assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
782 
783   MVT EltVT = VT.getVectorElementType();
784   switch (EltVT.SimpleTy) {
785   default:
786     llvm_unreachable("unexpected element type for RVV container");
787   case MVT::i1: {
788     // Masks are calculated assuming 8-bit elements since that's when we need
789     // the most elements.
790     unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8;
791     return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock);
792   }
793   case MVT::i8:
794   case MVT::i16:
795   case MVT::i32:
796   case MVT::i64:
797   case MVT::f16:
798   case MVT::f32:
799   case MVT::f64: {
800     unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits();
801     return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock);
802   }
803   }
804 }
805 
806 // Grow V to consume an entire RVV register.
807 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
808                                        const RISCVSubtarget &Subtarget) {
809   assert(VT.isScalableVector() &&
810          "Expected to convert into a scalable vector!");
811   assert(V.getValueType().isFixedLengthVector() &&
812          "Expected a fixed length vector operand!");
813   SDLoc DL(V);
814   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
815   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
816 }
817 
818 // Shrink V so it's just big enough to maintain a VT's worth of data.
819 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
820                                          const RISCVSubtarget &Subtarget) {
821   assert(VT.isFixedLengthVector() &&
822          "Expected to convert into a fixed length vector!");
823   assert(V.getValueType().isScalableVector() &&
824          "Expected a scalable vector operand!");
825   SDLoc DL(V);
826   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
827   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
828 }
829 
830 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
831                                  const RISCVSubtarget &Subtarget) {
832   MVT VT = Op.getSimpleValueType();
833   assert(VT.isFixedLengthVector() && "Unexpected vector!");
834 
835   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
836 
837   SDLoc DL(Op);
838   SDValue VL =
839       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
840 
841   if (VT.getVectorElementType() == MVT::i1) {
842     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
843       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
844       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
845     }
846 
847     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
848       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
849       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
850     }
851 
852     return SDValue();
853   }
854 
855   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
856     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
857                                         : RISCVISD::VMV_V_X_VL;
858     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
859     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
860   }
861 
862   // Try and match an index sequence, which we can lower directly to the vid
863   // instruction. An all-undef vector is matched by getSplatValue, above.
864   bool IsVID = true;
865   if (VT.isInteger())
866     for (unsigned i = 0, e = Op.getNumOperands(); i < e && IsVID; i++)
867       IsVID &= Op.getOperand(i).isUndef() ||
868                (isa<ConstantSDNode>(Op.getOperand(i)) &&
869                 Op.getConstantOperandVal(i) == i);
870 
871   if (IsVID) {
872     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
873     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
874     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
875     return convertFromScalableVector(VT, VID, DAG, Subtarget);
876   }
877 
878   return SDValue();
879 }
880 
881 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
882                                    const RISCVSubtarget &Subtarget) {
883   SDValue V1 = Op.getOperand(0);
884   SDLoc DL(Op);
885   MVT VT = Op.getSimpleValueType();
886   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
887 
888   if (SVN->isSplat()) {
889     int Lane = SVN->getSplatIndex();
890     if (Lane >= 0) {
891       MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
892 
893       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
894       assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!");
895 
896       MVT XLenVT = Subtarget.getXLenVT();
897       SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
898       MVT MaskVT =
899           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
900       SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
901       SDValue Gather =
902           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
903                       DAG.getConstant(Lane, DL, XLenVT), Mask, VL);
904       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
905     }
906   }
907 
908   return SDValue();
909 }
910 
911 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
912                                             SelectionDAG &DAG) const {
913   switch (Op.getOpcode()) {
914   default:
915     report_fatal_error("unimplemented operand");
916   case ISD::GlobalAddress:
917     return lowerGlobalAddress(Op, DAG);
918   case ISD::BlockAddress:
919     return lowerBlockAddress(Op, DAG);
920   case ISD::ConstantPool:
921     return lowerConstantPool(Op, DAG);
922   case ISD::JumpTable:
923     return lowerJumpTable(Op, DAG);
924   case ISD::GlobalTLSAddress:
925     return lowerGlobalTLSAddress(Op, DAG);
926   case ISD::SELECT:
927     return lowerSELECT(Op, DAG);
928   case ISD::VASTART:
929     return lowerVASTART(Op, DAG);
930   case ISD::FRAMEADDR:
931     return lowerFRAMEADDR(Op, DAG);
932   case ISD::RETURNADDR:
933     return lowerRETURNADDR(Op, DAG);
934   case ISD::SHL_PARTS:
935     return lowerShiftLeftParts(Op, DAG);
936   case ISD::SRA_PARTS:
937     return lowerShiftRightParts(Op, DAG, true);
938   case ISD::SRL_PARTS:
939     return lowerShiftRightParts(Op, DAG, false);
940   case ISD::BITCAST: {
941     assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
942             Subtarget.hasStdExtZfh()) &&
943            "Unexpected custom legalisation");
944     SDLoc DL(Op);
945     SDValue Op0 = Op.getOperand(0);
946     if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
947       if (Op0.getValueType() != MVT::i16)
948         return SDValue();
949       SDValue NewOp0 =
950           DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
951       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
952       return FPConv;
953     } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
954                Subtarget.hasStdExtF()) {
955       if (Op0.getValueType() != MVT::i32)
956         return SDValue();
957       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
958       SDValue FPConv =
959           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
960       return FPConv;
961     }
962     return SDValue();
963   }
964   case ISD::INTRINSIC_WO_CHAIN:
965     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
966   case ISD::INTRINSIC_W_CHAIN:
967     return LowerINTRINSIC_W_CHAIN(Op, DAG);
968   case ISD::BSWAP:
969   case ISD::BITREVERSE: {
970     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
971     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
972     MVT VT = Op.getSimpleValueType();
973     SDLoc DL(Op);
974     // Start with the maximum immediate value which is the bitwidth - 1.
975     unsigned Imm = VT.getSizeInBits() - 1;
976     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
977     if (Op.getOpcode() == ISD::BSWAP)
978       Imm &= ~0x7U;
979     return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
980                        DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
981   }
982   case ISD::FSHL:
983   case ISD::FSHR: {
984     MVT VT = Op.getSimpleValueType();
985     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
986     SDLoc DL(Op);
987     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
988     // use log(XLen) bits. Mask the shift amount accordingly.
989     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
990     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
991                                 DAG.getConstant(ShAmtWidth, DL, VT));
992     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
993     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
994   }
995   case ISD::TRUNCATE: {
996     SDLoc DL(Op);
997     EVT VT = Op.getValueType();
998     // Only custom-lower vector truncates
999     if (!VT.isVector())
1000       return Op;
1001 
1002     // Truncates to mask types are handled differently
1003     if (VT.getVectorElementType() == MVT::i1)
1004       return lowerVectorMaskTrunc(Op, DAG);
1005 
1006     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
1007     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which
1008     // truncate by one power of two at a time.
1009     EVT DstEltVT = VT.getVectorElementType();
1010 
1011     SDValue Src = Op.getOperand(0);
1012     EVT SrcVT = Src.getValueType();
1013     EVT SrcEltVT = SrcVT.getVectorElementType();
1014 
1015     assert(DstEltVT.bitsLT(SrcEltVT) &&
1016            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
1017            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
1018            "Unexpected vector truncate lowering");
1019 
1020     SDValue Result = Src;
1021     LLVMContext &Context = *DAG.getContext();
1022     const ElementCount Count = SrcVT.getVectorElementCount();
1023     do {
1024       SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2);
1025       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
1026       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result);
1027     } while (SrcEltVT != DstEltVT);
1028 
1029     return Result;
1030   }
1031   case ISD::ANY_EXTEND:
1032   case ISD::ZERO_EXTEND:
1033     return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
1034   case ISD::SIGN_EXTEND:
1035     return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
1036   case ISD::SPLAT_VECTOR:
1037     return lowerSPLATVECTOR(Op, DAG);
1038   case ISD::INSERT_VECTOR_ELT:
1039     return lowerINSERT_VECTOR_ELT(Op, DAG);
1040   case ISD::EXTRACT_VECTOR_ELT:
1041     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1042   case ISD::VSCALE: {
1043     MVT VT = Op.getSimpleValueType();
1044     SDLoc DL(Op);
1045     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
1046     // We define our scalable vector types for lmul=1 to use a 64 bit known
1047     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
1048     // vscale as VLENB / 8.
1049     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
1050                                  DAG.getConstant(3, DL, VT));
1051     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
1052   }
1053   case ISD::FP_EXTEND: {
1054     // RVV can only do fp_extend to types double the size as the source. We
1055     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
1056     // via f32.
1057     MVT VT = Op.getSimpleValueType();
1058     MVT SrcVT = Op.getOperand(0).getSimpleValueType();
1059     // We only need to close the gap between vXf16->vXf64.
1060     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
1061         SrcVT.getVectorElementType() != MVT::f16)
1062       return Op;
1063     SDLoc DL(Op);
1064     MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1065     SDValue IntermediateRound =
1066         DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT);
1067     return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
1068   }
1069   case ISD::FP_ROUND: {
1070     // RVV can only do fp_round to types half the size as the source. We
1071     // custom-lower f64->f16 rounds via RVV's round-to-odd float
1072     // conversion instruction.
1073     MVT VT = Op.getSimpleValueType();
1074     MVT SrcVT = Op.getOperand(0).getSimpleValueType();
1075     // We only need to close the gap between vXf64<->vXf16.
1076     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
1077         SrcVT.getVectorElementType() != MVT::f64)
1078       return Op;
1079     SDLoc DL(Op);
1080     MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1081     SDValue IntermediateRound =
1082         DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0));
1083     return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
1084   }
1085   case ISD::FP_TO_SINT:
1086   case ISD::FP_TO_UINT:
1087   case ISD::SINT_TO_FP:
1088   case ISD::UINT_TO_FP: {
1089     // RVV can only do fp<->int conversions to types half/double the size as
1090     // the source. We custom-lower any conversions that do two hops into
1091     // sequences.
1092     MVT VT = Op.getSimpleValueType();
1093     if (!VT.isVector())
1094       return Op;
1095     SDLoc DL(Op);
1096     SDValue Src = Op.getOperand(0);
1097     MVT EltVT = VT.getVectorElementType();
1098     MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType();
1099     unsigned EltSize = EltVT.getSizeInBits();
1100     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
1101     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
1102            "Unexpected vector element types");
1103     bool IsInt2FP = SrcEltVT.isInteger();
1104     // Widening conversions
1105     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
1106       if (IsInt2FP) {
1107         // Do a regular integer sign/zero extension then convert to float.
1108         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
1109                                       VT.getVectorElementCount());
1110         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
1111                                  ? ISD::ZERO_EXTEND
1112                                  : ISD::SIGN_EXTEND;
1113         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
1114         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
1115       }
1116       // FP2Int
1117       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
1118       // Do one doubling fp_extend then complete the operation by converting
1119       // to int.
1120       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1121       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
1122       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
1123     }
1124 
1125     // Narrowing conversions
1126     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
1127       if (IsInt2FP) {
1128         // One narrowing int_to_fp, then an fp_round.
1129         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
1130         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1131         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
1132         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
1133       }
1134       // FP2Int
1135       // One narrowing fp_to_int, then truncate the integer. If the float isn't
1136       // representable by the integer, the result is poison.
1137       MVT IVecVT =
1138           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
1139                            VT.getVectorElementCount());
1140       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
1141       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
1142     }
1143 
1144     return Op;
1145   }
1146   case ISD::VECREDUCE_ADD:
1147   case ISD::VECREDUCE_UMAX:
1148   case ISD::VECREDUCE_SMAX:
1149   case ISD::VECREDUCE_UMIN:
1150   case ISD::VECREDUCE_SMIN:
1151   case ISD::VECREDUCE_AND:
1152   case ISD::VECREDUCE_OR:
1153   case ISD::VECREDUCE_XOR:
1154     return lowerVECREDUCE(Op, DAG);
1155   case ISD::VECREDUCE_FADD:
1156   case ISD::VECREDUCE_SEQ_FADD:
1157     return lowerFPVECREDUCE(Op, DAG);
1158   case ISD::BUILD_VECTOR:
1159     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
1160   case ISD::VECTOR_SHUFFLE:
1161     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
1162   case ISD::LOAD:
1163     return lowerFixedLengthVectorLoadToRVV(Op, DAG);
1164   case ISD::STORE:
1165     return lowerFixedLengthVectorStoreToRVV(Op, DAG);
1166   case ISD::ADD:
1167     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
1168   case ISD::SUB:
1169     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
1170   case ISD::MUL:
1171     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
1172   case ISD::AND:
1173     return lowerToScalableOp(Op, DAG, RISCVISD::AND_VL);
1174   case ISD::OR:
1175     return lowerToScalableOp(Op, DAG, RISCVISD::OR_VL);
1176   case ISD::XOR:
1177     return lowerToScalableOp(Op, DAG, RISCVISD::XOR_VL);
1178   case ISD::SDIV:
1179     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
1180   case ISD::SREM:
1181     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
1182   case ISD::UDIV:
1183     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
1184   case ISD::UREM:
1185     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
1186   case ISD::SHL:
1187     return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
1188   case ISD::SRA:
1189     return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
1190   case ISD::SRL:
1191     return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
1192   case ISD::FADD:
1193     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
1194   case ISD::FSUB:
1195     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
1196   case ISD::FMUL:
1197     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
1198   case ISD::FDIV:
1199     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
1200   case ISD::FNEG:
1201     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
1202   case ISD::FMA:
1203     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
1204   }
1205 }
1206 
1207 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1208                              SelectionDAG &DAG, unsigned Flags) {
1209   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1210 }
1211 
1212 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1213                              SelectionDAG &DAG, unsigned Flags) {
1214   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1215                                    Flags);
1216 }
1217 
1218 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1219                              SelectionDAG &DAG, unsigned Flags) {
1220   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1221                                    N->getOffset(), Flags);
1222 }
1223 
1224 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1225                              SelectionDAG &DAG, unsigned Flags) {
1226   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1227 }
1228 
1229 template <class NodeTy>
1230 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1231                                      bool IsLocal) const {
1232   SDLoc DL(N);
1233   EVT Ty = getPointerTy(DAG.getDataLayout());
1234 
1235   if (isPositionIndependent()) {
1236     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1237     if (IsLocal)
1238       // Use PC-relative addressing to access the symbol. This generates the
1239       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
1240       // %pcrel_lo(auipc)).
1241       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1242 
1243     // Use PC-relative addressing to access the GOT for this symbol, then load
1244     // the address from the GOT. This generates the pattern (PseudoLA sym),
1245     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
1246     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
1247   }
1248 
1249   switch (getTargetMachine().getCodeModel()) {
1250   default:
1251     report_fatal_error("Unsupported code model for lowering");
1252   case CodeModel::Small: {
1253     // Generate a sequence for accessing addresses within the first 2 GiB of
1254     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
1255     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
1256     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
1257     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1258     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
1259   }
1260   case CodeModel::Medium: {
1261     // Generate a sequence for accessing addresses within any 2GiB range within
1262     // the address space. This generates the pattern (PseudoLLA sym), which
1263     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
1264     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1265     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
1266   }
1267   }
1268 }
1269 
1270 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
1271                                                 SelectionDAG &DAG) const {
1272   SDLoc DL(Op);
1273   EVT Ty = Op.getValueType();
1274   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1275   int64_t Offset = N->getOffset();
1276   MVT XLenVT = Subtarget.getXLenVT();
1277 
1278   const GlobalValue *GV = N->getGlobal();
1279   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1280   SDValue Addr = getAddr(N, DAG, IsLocal);
1281 
1282   // In order to maximise the opportunity for common subexpression elimination,
1283   // emit a separate ADD node for the global address offset instead of folding
1284   // it in the global address node. Later peephole optimisations may choose to
1285   // fold it back in when profitable.
1286   if (Offset != 0)
1287     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1288                        DAG.getConstant(Offset, DL, XLenVT));
1289   return Addr;
1290 }
1291 
1292 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
1293                                                SelectionDAG &DAG) const {
1294   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
1295 
1296   return getAddr(N, DAG);
1297 }
1298 
1299 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
1300                                                SelectionDAG &DAG) const {
1301   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
1302 
1303   return getAddr(N, DAG);
1304 }
1305 
1306 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
1307                                             SelectionDAG &DAG) const {
1308   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
1309 
1310   return getAddr(N, DAG);
1311 }
1312 
1313 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1314                                               SelectionDAG &DAG,
1315                                               bool UseGOT) const {
1316   SDLoc DL(N);
1317   EVT Ty = getPointerTy(DAG.getDataLayout());
1318   const GlobalValue *GV = N->getGlobal();
1319   MVT XLenVT = Subtarget.getXLenVT();
1320 
1321   if (UseGOT) {
1322     // Use PC-relative addressing to access the GOT for this TLS symbol, then
1323     // load the address from the GOT and add the thread pointer. This generates
1324     // the pattern (PseudoLA_TLS_IE sym), which expands to
1325     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
1326     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1327     SDValue Load =
1328         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
1329 
1330     // Add the thread pointer.
1331     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1332     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
1333   }
1334 
1335   // Generate a sequence for accessing the address relative to the thread
1336   // pointer, with the appropriate adjustment for the thread pointer offset.
1337   // This generates the pattern
1338   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
1339   SDValue AddrHi =
1340       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
1341   SDValue AddrAdd =
1342       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
1343   SDValue AddrLo =
1344       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
1345 
1346   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1347   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1348   SDValue MNAdd = SDValue(
1349       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
1350       0);
1351   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
1352 }
1353 
1354 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1355                                                SelectionDAG &DAG) const {
1356   SDLoc DL(N);
1357   EVT Ty = getPointerTy(DAG.getDataLayout());
1358   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1359   const GlobalValue *GV = N->getGlobal();
1360 
1361   // Use a PC-relative addressing mode to access the global dynamic GOT address.
1362   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
1363   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
1364   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1365   SDValue Load =
1366       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
1367 
1368   // Prepare argument list to generate call.
1369   ArgListTy Args;
1370   ArgListEntry Entry;
1371   Entry.Node = Load;
1372   Entry.Ty = CallTy;
1373   Args.push_back(Entry);
1374 
1375   // Setup call to __tls_get_addr.
1376   TargetLowering::CallLoweringInfo CLI(DAG);
1377   CLI.setDebugLoc(DL)
1378       .setChain(DAG.getEntryNode())
1379       .setLibCallee(CallingConv::C, CallTy,
1380                     DAG.getExternalSymbol("__tls_get_addr", Ty),
1381                     std::move(Args));
1382 
1383   return LowerCallTo(CLI).first;
1384 }
1385 
1386 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1387                                                    SelectionDAG &DAG) const {
1388   SDLoc DL(Op);
1389   EVT Ty = Op.getValueType();
1390   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1391   int64_t Offset = N->getOffset();
1392   MVT XLenVT = Subtarget.getXLenVT();
1393 
1394   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
1395 
1396   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1397       CallingConv::GHC)
1398     report_fatal_error("In GHC calling convention TLS is not supported");
1399 
1400   SDValue Addr;
1401   switch (Model) {
1402   case TLSModel::LocalExec:
1403     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
1404     break;
1405   case TLSModel::InitialExec:
1406     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
1407     break;
1408   case TLSModel::LocalDynamic:
1409   case TLSModel::GeneralDynamic:
1410     Addr = getDynamicTLSAddr(N, DAG);
1411     break;
1412   }
1413 
1414   // In order to maximise the opportunity for common subexpression elimination,
1415   // emit a separate ADD node for the global address offset instead of folding
1416   // it in the global address node. Later peephole optimisations may choose to
1417   // fold it back in when profitable.
1418   if (Offset != 0)
1419     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1420                        DAG.getConstant(Offset, DL, XLenVT));
1421   return Addr;
1422 }
1423 
1424 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
1425   SDValue CondV = Op.getOperand(0);
1426   SDValue TrueV = Op.getOperand(1);
1427   SDValue FalseV = Op.getOperand(2);
1428   SDLoc DL(Op);
1429   MVT XLenVT = Subtarget.getXLenVT();
1430 
1431   // If the result type is XLenVT and CondV is the output of a SETCC node
1432   // which also operated on XLenVT inputs, then merge the SETCC node into the
1433   // lowered RISCVISD::SELECT_CC to take advantage of the integer
1434   // compare+branch instructions. i.e.:
1435   // (select (setcc lhs, rhs, cc), truev, falsev)
1436   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
1437   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
1438       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
1439     SDValue LHS = CondV.getOperand(0);
1440     SDValue RHS = CondV.getOperand(1);
1441     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
1442     ISD::CondCode CCVal = CC->get();
1443 
1444     normaliseSetCC(LHS, RHS, CCVal);
1445 
1446     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
1447     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1448     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1449   }
1450 
1451   // Otherwise:
1452   // (select condv, truev, falsev)
1453   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
1454   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
1455   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
1456 
1457   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1458 
1459   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1460 }
1461 
1462 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1463   MachineFunction &MF = DAG.getMachineFunction();
1464   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
1465 
1466   SDLoc DL(Op);
1467   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1468                                  getPointerTy(MF.getDataLayout()));
1469 
1470   // vastart just stores the address of the VarArgsFrameIndex slot into the
1471   // memory location argument.
1472   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1473   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1474                       MachinePointerInfo(SV));
1475 }
1476 
1477 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
1478                                             SelectionDAG &DAG) const {
1479   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1480   MachineFunction &MF = DAG.getMachineFunction();
1481   MachineFrameInfo &MFI = MF.getFrameInfo();
1482   MFI.setFrameAddressIsTaken(true);
1483   Register FrameReg = RI.getFrameRegister(MF);
1484   int XLenInBytes = Subtarget.getXLen() / 8;
1485 
1486   EVT VT = Op.getValueType();
1487   SDLoc DL(Op);
1488   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1489   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1490   while (Depth--) {
1491     int Offset = -(XLenInBytes * 2);
1492     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1493                               DAG.getIntPtrConstant(Offset, DL));
1494     FrameAddr =
1495         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1496   }
1497   return FrameAddr;
1498 }
1499 
1500 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
1501                                              SelectionDAG &DAG) const {
1502   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1503   MachineFunction &MF = DAG.getMachineFunction();
1504   MachineFrameInfo &MFI = MF.getFrameInfo();
1505   MFI.setReturnAddressIsTaken(true);
1506   MVT XLenVT = Subtarget.getXLenVT();
1507   int XLenInBytes = Subtarget.getXLen() / 8;
1508 
1509   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1510     return SDValue();
1511 
1512   EVT VT = Op.getValueType();
1513   SDLoc DL(Op);
1514   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1515   if (Depth) {
1516     int Off = -XLenInBytes;
1517     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
1518     SDValue Offset = DAG.getConstant(Off, DL, VT);
1519     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1520                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1521                        MachinePointerInfo());
1522   }
1523 
1524   // Return the value of the return address register, marking it an implicit
1525   // live-in.
1526   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
1527   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
1528 }
1529 
1530 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
1531                                                  SelectionDAG &DAG) const {
1532   SDLoc DL(Op);
1533   SDValue Lo = Op.getOperand(0);
1534   SDValue Hi = Op.getOperand(1);
1535   SDValue Shamt = Op.getOperand(2);
1536   EVT VT = Lo.getValueType();
1537 
1538   // if Shamt-XLEN < 0: // Shamt < XLEN
1539   //   Lo = Lo << Shamt
1540   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
1541   // else:
1542   //   Lo = 0
1543   //   Hi = Lo << (Shamt-XLEN)
1544 
1545   SDValue Zero = DAG.getConstant(0, DL, VT);
1546   SDValue One = DAG.getConstant(1, DL, VT);
1547   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1548   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1549   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1550   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1551 
1552   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1553   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1554   SDValue ShiftRightLo =
1555       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
1556   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1557   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1558   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
1559 
1560   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1561 
1562   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1563   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1564 
1565   SDValue Parts[2] = {Lo, Hi};
1566   return DAG.getMergeValues(Parts, DL);
1567 }
1568 
1569 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
1570                                                   bool IsSRA) const {
1571   SDLoc DL(Op);
1572   SDValue Lo = Op.getOperand(0);
1573   SDValue Hi = Op.getOperand(1);
1574   SDValue Shamt = Op.getOperand(2);
1575   EVT VT = Lo.getValueType();
1576 
1577   // SRA expansion:
1578   //   if Shamt-XLEN < 0: // Shamt < XLEN
1579   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1580   //     Hi = Hi >>s Shamt
1581   //   else:
1582   //     Lo = Hi >>s (Shamt-XLEN);
1583   //     Hi = Hi >>s (XLEN-1)
1584   //
1585   // SRL expansion:
1586   //   if Shamt-XLEN < 0: // Shamt < XLEN
1587   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1588   //     Hi = Hi >>u Shamt
1589   //   else:
1590   //     Lo = Hi >>u (Shamt-XLEN);
1591   //     Hi = 0;
1592 
1593   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1594 
1595   SDValue Zero = DAG.getConstant(0, DL, VT);
1596   SDValue One = DAG.getConstant(1, DL, VT);
1597   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1598   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1599   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1600   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1601 
1602   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1603   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1604   SDValue ShiftLeftHi =
1605       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
1606   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1607   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1608   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
1609   SDValue HiFalse =
1610       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
1611 
1612   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1613 
1614   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1615   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1616 
1617   SDValue Parts[2] = {Lo, Hi};
1618   return DAG.getMergeValues(Parts, DL);
1619 }
1620 
1621 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is
1622 // illegal (currently only vXi64 RV32).
1623 // FIXME: We could also catch non-constant sign-extended i32 values and lower
1624 // them to SPLAT_VECTOR_I64
1625 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op,
1626                                               SelectionDAG &DAG) const {
1627   SDLoc DL(Op);
1628   EVT VecVT = Op.getValueType();
1629   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
1630          "Unexpected SPLAT_VECTOR lowering");
1631   SDValue SplatVal = Op.getOperand(0);
1632 
1633   // If we can prove that the value is a sign-extended 32-bit value, lower this
1634   // as a custom node in order to try and match RVV vector/scalar instructions.
1635   if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) {
1636     if (isInt<32>(CVal->getSExtValue()))
1637       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1638                          DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32));
1639   }
1640 
1641   if (SplatVal.getOpcode() == ISD::SIGN_EXTEND &&
1642       SplatVal.getOperand(0).getValueType() == MVT::i32) {
1643     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1644                        SplatVal.getOperand(0));
1645   }
1646 
1647   // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
1648   // to accidentally sign-extend the 32-bit halves to the e64 SEW:
1649   // vmv.v.x vX, hi
1650   // vsll.vx vX, vX, /*32*/
1651   // vmv.v.x vY, lo
1652   // vsll.vx vY, vY, /*32*/
1653   // vsrl.vx vY, vY, /*32*/
1654   // vor.vv vX, vX, vY
1655   SDValue One = DAG.getConstant(1, DL, MVT::i32);
1656   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
1657   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
1658   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero);
1659   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One);
1660 
1661   Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
1662   Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
1663   Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
1664 
1665   if (isNullConstant(Hi))
1666     return Lo;
1667 
1668   Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
1669   Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
1670 
1671   return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
1672 }
1673 
1674 // Custom-lower extensions from mask vectors by using a vselect either with 1
1675 // for zero/any-extension or -1 for sign-extension:
1676 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
1677 // Note that any-extension is lowered identically to zero-extension.
1678 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
1679                                                 int64_t ExtTrueVal) const {
1680   SDLoc DL(Op);
1681   EVT VecVT = Op.getValueType();
1682   SDValue Src = Op.getOperand(0);
1683   // Only custom-lower extensions from mask types
1684   if (!Src.getValueType().isVector() ||
1685       Src.getValueType().getVectorElementType() != MVT::i1)
1686     return Op;
1687 
1688   // Be careful not to introduce illegal scalar types at this stage, and be
1689   // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
1690   // illegal and must be expanded. Since we know that the constants are
1691   // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
1692   bool IsRV32E64 =
1693       !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
1694   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1695   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT());
1696 
1697   if (!IsRV32E64) {
1698     SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
1699     SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
1700   } else {
1701     SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
1702     SplatTrueVal =
1703         DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
1704   }
1705 
1706   return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
1707 }
1708 
1709 // Custom-lower truncations from vectors to mask vectors by using a mask and a
1710 // setcc operation:
1711 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
1712 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
1713                                                   SelectionDAG &DAG) const {
1714   SDLoc DL(Op);
1715   EVT MaskVT = Op.getValueType();
1716   // Only expect to custom-lower truncations to mask types
1717   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
1718          "Unexpected type for vector mask lowering");
1719   SDValue Src = Op.getOperand(0);
1720   EVT VecVT = Src.getValueType();
1721 
1722   // Be careful not to introduce illegal scalar types at this stage, and be
1723   // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
1724   // illegal and must be expanded. Since we know that the constants are
1725   // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
1726   bool IsRV32E64 =
1727       !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
1728   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
1729   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1730 
1731   if (!IsRV32E64) {
1732     SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne);
1733     SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
1734   } else {
1735     SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne);
1736     SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
1737   }
1738 
1739   SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
1740 
1741   return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
1742 }
1743 
1744 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1745                                                     SelectionDAG &DAG) const {
1746   SDLoc DL(Op);
1747   EVT VecVT = Op.getValueType();
1748   SDValue Vec = Op.getOperand(0);
1749   SDValue Val = Op.getOperand(1);
1750   SDValue Idx = Op.getOperand(2);
1751 
1752   // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is
1753   // first slid down into position, the value is inserted into the first
1754   // position, and the vector is slid back up. We do this to simplify patterns.
1755   //   (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx),
1756   if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) {
1757     if (isNullConstant(Idx))
1758       return Op;
1759     SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
1760                                     DAG.getUNDEF(VecVT), Vec, Idx);
1761     SDValue InsertElt0 =
1762         DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val,
1763                     DAG.getConstant(0, DL, Subtarget.getXLenVT()));
1764 
1765     return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx);
1766   }
1767 
1768   // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
1769   // is illegal (currently only vXi64 RV32).
1770   // Since there is no easy way of getting a single element into a vector when
1771   // XLEN<SEW, we lower the operation to the following sequence:
1772   //   splat      vVal, rVal
1773   //   vid.v      vVid
1774   //   vmseq.vx   mMask, vVid, rIdx
1775   //   vmerge.vvm vDest, vSrc, vVal, mMask
1776   // This essentially merges the original vector with the inserted element by
1777   // using a mask whose only set bit is that corresponding to the insert
1778   // index.
1779   SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val);
1780   SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx);
1781 
1782   SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
1783   MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
1784   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1785   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VecVT, Mask, VL);
1786   auto SetCCVT =
1787       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT);
1788   SDValue SelectCond = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ);
1789 
1790   return DAG.getNode(ISD::VSELECT, DL, VecVT, SelectCond, SplattedVal, Vec);
1791 }
1792 
1793 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
1794 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
1795 // types this is done using VMV_X_S to allow us to glean information about the
1796 // sign bits of the result.
1797 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1798                                                      SelectionDAG &DAG) const {
1799   SDLoc DL(Op);
1800   SDValue Idx = Op.getOperand(1);
1801   SDValue Vec = Op.getOperand(0);
1802   EVT EltVT = Op.getValueType();
1803   EVT VecVT = Vec.getValueType();
1804   MVT XLenVT = Subtarget.getXLenVT();
1805 
1806   // If the index is 0, the vector is already in the right position.
1807   if (!isNullConstant(Idx)) {
1808     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, DAG.getUNDEF(VecVT), Vec,
1809                       Idx);
1810   }
1811 
1812   if (!EltVT.isInteger()) {
1813     // Floating-point extracts are handled in TableGen.
1814     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
1815                        DAG.getConstant(0, DL, XLenVT));
1816   }
1817 
1818   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
1819   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
1820 }
1821 
1822 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1823                                                      SelectionDAG &DAG) const {
1824   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1825   SDLoc DL(Op);
1826 
1827   if (Subtarget.hasStdExtV()) {
1828     // Some RVV intrinsics may claim that they want an integer operand to be
1829     // extended.
1830     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1831             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1832       if (II->ExtendedOperand) {
1833         assert(II->ExtendedOperand < Op.getNumOperands());
1834         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1835         SDValue &ScalarOp = Operands[II->ExtendedOperand];
1836         EVT OpVT = ScalarOp.getValueType();
1837         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1838             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1839           // If the operand is a constant, sign extend to increase our chances
1840           // of being able to use a .vi instruction. ANY_EXTEND would become a
1841           // a zero extend and the simm5 check in isel would fail.
1842           // FIXME: Should we ignore the upper bits in isel instead?
1843           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1844                                                           : ISD::ANY_EXTEND;
1845           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1846           return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
1847                              Operands);
1848         }
1849       }
1850     }
1851   }
1852 
1853   switch (IntNo) {
1854   default:
1855     return SDValue();    // Don't custom lower most intrinsics.
1856   case Intrinsic::thread_pointer: {
1857     EVT PtrVT = getPointerTy(DAG.getDataLayout());
1858     return DAG.getRegister(RISCV::X4, PtrVT);
1859   }
1860   case Intrinsic::riscv_vmv_x_s:
1861     assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!");
1862     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
1863                        Op.getOperand(1));
1864   case Intrinsic::riscv_vmv_v_x: {
1865     SDValue Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(),
1866                                  Op.getOperand(1));
1867     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(),
1868                        Scalar, Op.getOperand(2));
1869   }
1870   case Intrinsic::riscv_vfmv_v_f:
1871     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
1872                        Op.getOperand(1), Op.getOperand(2));
1873   }
1874 }
1875 
1876 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
1877                                                     SelectionDAG &DAG) const {
1878   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1879   SDLoc DL(Op);
1880 
1881   if (Subtarget.hasStdExtV()) {
1882     // Some RVV intrinsics may claim that they want an integer operand to be
1883     // extended.
1884     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1885             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1886       if (II->ExtendedOperand) {
1887         // The operands start from the second argument in INTRINSIC_W_CHAIN.
1888         unsigned ExtendOp = II->ExtendedOperand + 1;
1889         assert(ExtendOp < Op.getNumOperands());
1890         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1891         SDValue &ScalarOp = Operands[ExtendOp];
1892         EVT OpVT = ScalarOp.getValueType();
1893         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1894             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1895           // If the operand is a constant, sign extend to increase our chances
1896           // of being able to use a .vi instruction. ANY_EXTEND would become a
1897           // a zero extend and the simm5 check in isel would fail.
1898           // FIXME: Should we ignore the upper bits in isel instead?
1899           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1900                                                           : ISD::ANY_EXTEND;
1901           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1902           return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(),
1903                              Operands);
1904         }
1905       }
1906     }
1907   }
1908 
1909   switch (IntNo) {
1910   default:
1911     return SDValue(); // Don't custom lower most intrinsics.
1912   case Intrinsic::riscv_vleff: {
1913     SDLoc DL(Op);
1914     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
1915     SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0),
1916                                Op.getOperand(2), Op.getOperand(3));
1917     SDValue ReadVL =
1918         SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1),
1919                                    Load.getValue(2)),
1920                 0);
1921     return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
1922   }
1923   case Intrinsic::riscv_vleff_mask: {
1924     SDLoc DL(Op);
1925     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
1926     SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0),
1927                                Op.getOperand(2), Op.getOperand(3),
1928                                Op.getOperand(4), Op.getOperand(5));
1929     SDValue ReadVL =
1930         SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1),
1931                                    Load.getValue(2)),
1932                 0);
1933     return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
1934   }
1935   }
1936 }
1937 
1938 static std::pair<unsigned, uint64_t>
1939 getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) {
1940   switch (ISDOpcode) {
1941   default:
1942     llvm_unreachable("Unhandled reduction");
1943   case ISD::VECREDUCE_ADD:
1944     return {RISCVISD::VECREDUCE_ADD, 0};
1945   case ISD::VECREDUCE_UMAX:
1946     return {RISCVISD::VECREDUCE_UMAX, 0};
1947   case ISD::VECREDUCE_SMAX:
1948     return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)};
1949   case ISD::VECREDUCE_UMIN:
1950     return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)};
1951   case ISD::VECREDUCE_SMIN:
1952     return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)};
1953   case ISD::VECREDUCE_AND:
1954     return {RISCVISD::VECREDUCE_AND, -1};
1955   case ISD::VECREDUCE_OR:
1956     return {RISCVISD::VECREDUCE_OR, 0};
1957   case ISD::VECREDUCE_XOR:
1958     return {RISCVISD::VECREDUCE_XOR, 0};
1959   }
1960 }
1961 
1962 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV
1963 // reduction opcode. Note that this returns a vector type, which must be
1964 // further processed to access the scalar result in element 0.
1965 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
1966                                             SelectionDAG &DAG) const {
1967   SDLoc DL(Op);
1968   assert(Op.getValueType().isSimple() &&
1969          Op.getOperand(0).getValueType().isSimple() &&
1970          "Unexpected vector-reduce lowering");
1971   MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType();
1972   unsigned RVVOpcode;
1973   uint64_t IdentityVal;
1974   std::tie(RVVOpcode, IdentityVal) =
1975       getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits());
1976   // We have to perform a bit of a dance to get from our vector type to the
1977   // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector
1978   // element type to find the type which fills a single register. Be careful to
1979   // use the operand's vector element type rather than the reduction's value
1980   // type, as that has likely been extended to XLEN.
1981   unsigned NumElts = 64 / VecEltVT.getSizeInBits();
1982   MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts);
1983   SDValue IdentitySplat =
1984       DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT));
1985   SDValue Reduction =
1986       DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat);
1987   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
1988                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
1989   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
1990 }
1991 
1992 // Given a reduction op, this function returns the matching reduction opcode,
1993 // the vector SDValue and the scalar SDValue required to lower this to a
1994 // RISCVISD node.
1995 static std::tuple<unsigned, SDValue, SDValue>
1996 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
1997   SDLoc DL(Op);
1998   switch (Op.getOpcode()) {
1999   default:
2000     llvm_unreachable("Unhandled reduction");
2001   case ISD::VECREDUCE_FADD:
2002     return std::make_tuple(RISCVISD::VECREDUCE_FADD, Op.getOperand(0),
2003                            DAG.getConstantFP(0.0, DL, EltVT));
2004   case ISD::VECREDUCE_SEQ_FADD:
2005     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD, Op.getOperand(1),
2006                            Op.getOperand(0));
2007   }
2008 }
2009 
2010 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
2011                                               SelectionDAG &DAG) const {
2012   SDLoc DL(Op);
2013   MVT VecEltVT = Op.getSimpleValueType();
2014   // We have to perform a bit of a dance to get from our vector type to the
2015   // correct LMUL=1 vector type. See above for an explanation.
2016   unsigned NumElts = 64 / VecEltVT.getSizeInBits();
2017   MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts);
2018 
2019   unsigned RVVOpcode;
2020   SDValue VectorVal, ScalarVal;
2021   std::tie(RVVOpcode, VectorVal, ScalarVal) =
2022       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
2023 
2024   SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
2025   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat);
2026   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
2027                      DAG.getConstant(0, DL, Subtarget.getXLenVT()));
2028 }
2029 
2030 SDValue
2031 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
2032                                                      SelectionDAG &DAG) const {
2033   auto *Load = cast<LoadSDNode>(Op);
2034 
2035   SDLoc DL(Op);
2036   MVT VT = Op.getSimpleValueType();
2037   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2038 
2039   SDValue VL =
2040       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
2041 
2042   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2043   SDValue NewLoad = DAG.getMemIntrinsicNode(
2044       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
2045       Load->getMemoryVT(), Load->getMemOperand());
2046 
2047   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2048   return DAG.getMergeValues({Result, Load->getChain()}, DL);
2049 }
2050 
2051 SDValue
2052 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
2053                                                       SelectionDAG &DAG) const {
2054   auto *Store = cast<StoreSDNode>(Op);
2055 
2056   SDLoc DL(Op);
2057   MVT VT = Store->getValue().getSimpleValueType();
2058 
2059   // FIXME: We probably need to zero any extra bits in a byte for mask stores.
2060   // This is tricky to do.
2061 
2062   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2063 
2064   SDValue VL =
2065       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
2066 
2067   SDValue NewValue =
2068       convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget);
2069   return DAG.getMemIntrinsicNode(
2070       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
2071       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
2072       Store->getMemoryVT(), Store->getMemOperand());
2073 }
2074 
2075 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
2076                                                unsigned NewOpc) const {
2077   MVT VT = Op.getSimpleValueType();
2078   assert(useRVVForFixedLengthVectorVT(VT) &&
2079          "Only expected to lower fixed length vector operation!");
2080   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2081 
2082   // Create list of operands by converting existing ones to scalable types.
2083   SmallVector<SDValue, 6> Ops;
2084   for (const SDValue &V : Op->op_values()) {
2085     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
2086 
2087     // Pass through non-vector operands.
2088     if (!V.getValueType().isVector()) {
2089       Ops.push_back(V);
2090       continue;
2091     }
2092 
2093     // "cast" fixed length vector to a scalable vector.
2094     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
2095            "Only fixed length vectors are supported!");
2096     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
2097   }
2098 
2099   SDLoc DL(Op);
2100   SDValue VL =
2101       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
2102   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2103   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2104   Ops.push_back(Mask);
2105   Ops.push_back(VL);
2106 
2107   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
2108   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
2109 }
2110 
2111 // Returns the opcode of the target-specific SDNode that implements the 32-bit
2112 // form of the given Opcode.
2113 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
2114   switch (Opcode) {
2115   default:
2116     llvm_unreachable("Unexpected opcode");
2117   case ISD::SHL:
2118     return RISCVISD::SLLW;
2119   case ISD::SRA:
2120     return RISCVISD::SRAW;
2121   case ISD::SRL:
2122     return RISCVISD::SRLW;
2123   case ISD::SDIV:
2124     return RISCVISD::DIVW;
2125   case ISD::UDIV:
2126     return RISCVISD::DIVUW;
2127   case ISD::UREM:
2128     return RISCVISD::REMUW;
2129   case ISD::ROTL:
2130     return RISCVISD::ROLW;
2131   case ISD::ROTR:
2132     return RISCVISD::RORW;
2133   case RISCVISD::GREVI:
2134     return RISCVISD::GREVIW;
2135   case RISCVISD::GORCI:
2136     return RISCVISD::GORCIW;
2137   }
2138 }
2139 
2140 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
2141 // Because i32 isn't a legal type for RV64, these operations would otherwise
2142 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
2143 // later one because the fact the operation was originally of type i32 is
2144 // lost.
2145 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
2146                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
2147   SDLoc DL(N);
2148   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
2149   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2150   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2151   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2152   // ReplaceNodeResults requires we maintain the same type for the return value.
2153   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2154 }
2155 
2156 // Converts the given 32-bit operation to a i64 operation with signed extension
2157 // semantic to reduce the signed extension instructions.
2158 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
2159   SDLoc DL(N);
2160   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2161   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2162   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2163   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2164                                DAG.getValueType(MVT::i32));
2165   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2166 }
2167 
2168 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
2169                                              SmallVectorImpl<SDValue> &Results,
2170                                              SelectionDAG &DAG) const {
2171   SDLoc DL(N);
2172   switch (N->getOpcode()) {
2173   default:
2174     llvm_unreachable("Don't know how to custom type legalize this operation!");
2175   case ISD::STRICT_FP_TO_SINT:
2176   case ISD::STRICT_FP_TO_UINT:
2177   case ISD::FP_TO_SINT:
2178   case ISD::FP_TO_UINT: {
2179     bool IsStrict = N->isStrictFPOpcode();
2180     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2181            "Unexpected custom legalisation");
2182     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
2183     // If the FP type needs to be softened, emit a library call using the 'si'
2184     // version. If we left it to default legalization we'd end up with 'di'. If
2185     // the FP type doesn't need to be softened just let generic type
2186     // legalization promote the result type.
2187     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
2188         TargetLowering::TypeSoftenFloat)
2189       return;
2190     RTLIB::Libcall LC;
2191     if (N->getOpcode() == ISD::FP_TO_SINT ||
2192         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
2193       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
2194     else
2195       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
2196     MakeLibCallOptions CallOptions;
2197     EVT OpVT = Op0.getValueType();
2198     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
2199     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
2200     SDValue Result;
2201     std::tie(Result, Chain) =
2202         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
2203     Results.push_back(Result);
2204     if (IsStrict)
2205       Results.push_back(Chain);
2206     break;
2207   }
2208   case ISD::READCYCLECOUNTER: {
2209     assert(!Subtarget.is64Bit() &&
2210            "READCYCLECOUNTER only has custom type legalization on riscv32");
2211 
2212     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
2213     SDValue RCW =
2214         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
2215 
2216     Results.push_back(
2217         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
2218     Results.push_back(RCW.getValue(2));
2219     break;
2220   }
2221   case ISD::ADD:
2222   case ISD::SUB:
2223   case ISD::MUL:
2224     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2225            "Unexpected custom legalisation");
2226     if (N->getOperand(1).getOpcode() == ISD::Constant)
2227       return;
2228     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2229     break;
2230   case ISD::SHL:
2231   case ISD::SRA:
2232   case ISD::SRL:
2233     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2234            "Unexpected custom legalisation");
2235     if (N->getOperand(1).getOpcode() == ISD::Constant)
2236       return;
2237     Results.push_back(customLegalizeToWOp(N, DAG));
2238     break;
2239   case ISD::ROTL:
2240   case ISD::ROTR:
2241     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2242            "Unexpected custom legalisation");
2243     Results.push_back(customLegalizeToWOp(N, DAG));
2244     break;
2245   case ISD::SDIV:
2246   case ISD::UDIV:
2247   case ISD::UREM: {
2248     MVT VT = N->getSimpleValueType(0);
2249     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
2250            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
2251            "Unexpected custom legalisation");
2252     if (N->getOperand(0).getOpcode() == ISD::Constant ||
2253         N->getOperand(1).getOpcode() == ISD::Constant)
2254       return;
2255 
2256     // If the input is i32, use ANY_EXTEND since the W instructions don't read
2257     // the upper 32 bits. For other types we need to sign or zero extend
2258     // based on the opcode.
2259     unsigned ExtOpc = ISD::ANY_EXTEND;
2260     if (VT != MVT::i32)
2261       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
2262                                            : ISD::ZERO_EXTEND;
2263 
2264     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
2265     break;
2266   }
2267   case ISD::BITCAST: {
2268     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2269              Subtarget.hasStdExtF()) ||
2270             (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
2271            "Unexpected custom legalisation");
2272     SDValue Op0 = N->getOperand(0);
2273     if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
2274       if (Op0.getValueType() != MVT::f16)
2275         return;
2276       SDValue FPConv =
2277           DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
2278       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
2279     } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2280                Subtarget.hasStdExtF()) {
2281       if (Op0.getValueType() != MVT::f32)
2282         return;
2283       SDValue FPConv =
2284           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
2285       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
2286     }
2287     break;
2288   }
2289   case RISCVISD::GREVI:
2290   case RISCVISD::GORCI: {
2291     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2292            "Unexpected custom legalisation");
2293     // This is similar to customLegalizeToWOp, except that we pass the second
2294     // operand (a TargetConstant) straight through: it is already of type
2295     // XLenVT.
2296     SDLoc DL(N);
2297     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
2298     SDValue NewOp0 =
2299         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2300     SDValue NewRes =
2301         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
2302     // ReplaceNodeResults requires we maintain the same type for the return
2303     // value.
2304     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
2305     break;
2306   }
2307   case ISD::BSWAP:
2308   case ISD::BITREVERSE: {
2309     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2310            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2311     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2312                                  N->getOperand(0));
2313     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
2314     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
2315                                  DAG.getTargetConstant(Imm, DL,
2316                                                        Subtarget.getXLenVT()));
2317     // ReplaceNodeResults requires we maintain the same type for the return
2318     // value.
2319     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
2320     break;
2321   }
2322   case ISD::FSHL:
2323   case ISD::FSHR: {
2324     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2325            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
2326     SDValue NewOp0 =
2327         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2328     SDValue NewOp1 =
2329         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2330     SDValue NewOp2 =
2331         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
2332     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
2333     // Mask the shift amount to 5 bits.
2334     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
2335                          DAG.getConstant(0x1f, DL, MVT::i64));
2336     unsigned Opc =
2337         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
2338     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
2339     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
2340     break;
2341   }
2342   case ISD::EXTRACT_VECTOR_ELT: {
2343     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
2344     // type is illegal (currently only vXi64 RV32).
2345     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
2346     // transferred to the destination register. We issue two of these from the
2347     // upper- and lower- halves of the SEW-bit vector element, slid down to the
2348     // first element.
2349     SDLoc DL(N);
2350     SDValue Vec = N->getOperand(0);
2351     SDValue Idx = N->getOperand(1);
2352     EVT VecVT = Vec.getValueType();
2353     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
2354            VecVT.getVectorElementType() == MVT::i64 &&
2355            "Unexpected EXTRACT_VECTOR_ELT legalization");
2356 
2357     SDValue Slidedown = Vec;
2358     // Unless the index is known to be 0, we must slide the vector down to get
2359     // the desired element into index 0.
2360     if (!isNullConstant(Idx))
2361       Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
2362                               DAG.getUNDEF(VecVT), Vec, Idx);
2363 
2364     MVT XLenVT = Subtarget.getXLenVT();
2365     // Extract the lower XLEN bits of the correct vector element.
2366     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx);
2367 
2368     // To extract the upper XLEN bits of the vector element, shift the first
2369     // element right by 32 bits and re-extract the lower XLEN bits.
2370     SDValue ThirtyTwoV =
2371         DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
2372                     DAG.getConstant(32, DL, Subtarget.getXLenVT()));
2373     SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV);
2374 
2375     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx);
2376 
2377     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
2378     break;
2379   }
2380   case ISD::INTRINSIC_WO_CHAIN: {
2381     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2382     switch (IntNo) {
2383     default:
2384       llvm_unreachable(
2385           "Don't know how to custom type legalize this intrinsic!");
2386     case Intrinsic::riscv_vmv_x_s: {
2387       EVT VT = N->getValueType(0);
2388       assert((VT == MVT::i8 || VT == MVT::i16 ||
2389               (Subtarget.is64Bit() && VT == MVT::i32)) &&
2390              "Unexpected custom legalisation!");
2391       SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
2392                                     Subtarget.getXLenVT(), N->getOperand(1));
2393       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
2394       break;
2395     }
2396     }
2397     break;
2398   }
2399   case ISD::VECREDUCE_ADD:
2400   case ISD::VECREDUCE_AND:
2401   case ISD::VECREDUCE_OR:
2402   case ISD::VECREDUCE_XOR:
2403   case ISD::VECREDUCE_SMAX:
2404   case ISD::VECREDUCE_UMAX:
2405   case ISD::VECREDUCE_SMIN:
2406   case ISD::VECREDUCE_UMIN:
2407     // The custom-lowering for these nodes returns a vector whose first element
2408     // is the result of the reduction. Extract its first element and let the
2409     // legalization for EXTRACT_VECTOR_ELT do the rest of the job.
2410     Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG));
2411     break;
2412   }
2413 }
2414 
2415 // A structure to hold one of the bit-manipulation patterns below. Together, a
2416 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
2417 //   (or (and (shl x, 1), 0xAAAAAAAA),
2418 //       (and (srl x, 1), 0x55555555))
2419 struct RISCVBitmanipPat {
2420   SDValue Op;
2421   unsigned ShAmt;
2422   bool IsSHL;
2423 
2424   bool formsPairWith(const RISCVBitmanipPat &Other) const {
2425     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
2426   }
2427 };
2428 
2429 // Matches any of the following bit-manipulation patterns:
2430 //   (and (shl x, 1), (0x55555555 << 1))
2431 //   (and (srl x, 1), 0x55555555)
2432 //   (shl (and x, 0x55555555), 1)
2433 //   (srl (and x, (0x55555555 << 1)), 1)
2434 // where the shift amount and mask may vary thus:
2435 //   [1]  = 0x55555555 / 0xAAAAAAAA
2436 //   [2]  = 0x33333333 / 0xCCCCCCCC
2437 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
2438 //   [8]  = 0x00FF00FF / 0xFF00FF00
2439 //   [16] = 0x0000FFFF / 0xFFFFFFFF
2440 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
2441 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) {
2442   Optional<uint64_t> Mask;
2443   // Optionally consume a mask around the shift operation.
2444   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
2445     Mask = Op.getConstantOperandVal(1);
2446     Op = Op.getOperand(0);
2447   }
2448   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
2449     return None;
2450   bool IsSHL = Op.getOpcode() == ISD::SHL;
2451 
2452   if (!isa<ConstantSDNode>(Op.getOperand(1)))
2453     return None;
2454   auto ShAmt = Op.getConstantOperandVal(1);
2455 
2456   if (!isPowerOf2_64(ShAmt))
2457     return None;
2458 
2459   // These are the unshifted masks which we use to match bit-manipulation
2460   // patterns. They may be shifted left in certain circumstances.
2461   static const uint64_t BitmanipMasks[] = {
2462       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
2463       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL,
2464   };
2465 
2466   unsigned MaskIdx = Log2_64(ShAmt);
2467   if (MaskIdx >= array_lengthof(BitmanipMasks))
2468     return None;
2469 
2470   auto Src = Op.getOperand(0);
2471 
2472   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
2473   auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
2474 
2475   // The expected mask is shifted left when the AND is found around SHL
2476   // patterns.
2477   //   ((x >> 1) & 0x55555555)
2478   //   ((x << 1) & 0xAAAAAAAA)
2479   bool SHLExpMask = IsSHL;
2480 
2481   if (!Mask) {
2482     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
2483     // the mask is all ones: consume that now.
2484     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
2485       Mask = Src.getConstantOperandVal(1);
2486       Src = Src.getOperand(0);
2487       // The expected mask is now in fact shifted left for SRL, so reverse the
2488       // decision.
2489       //   ((x & 0xAAAAAAAA) >> 1)
2490       //   ((x & 0x55555555) << 1)
2491       SHLExpMask = !SHLExpMask;
2492     } else {
2493       // Use a default shifted mask of all-ones if there's no AND, truncated
2494       // down to the expected width. This simplifies the logic later on.
2495       Mask = maskTrailingOnes<uint64_t>(Width);
2496       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
2497     }
2498   }
2499 
2500   if (SHLExpMask)
2501     ExpMask <<= ShAmt;
2502 
2503   if (Mask != ExpMask)
2504     return None;
2505 
2506   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
2507 }
2508 
2509 // Match the following pattern as a GREVI(W) operation
2510 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
2511 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
2512                                const RISCVSubtarget &Subtarget) {
2513   EVT VT = Op.getValueType();
2514 
2515   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
2516     auto LHS = matchRISCVBitmanipPat(Op.getOperand(0));
2517     auto RHS = matchRISCVBitmanipPat(Op.getOperand(1));
2518     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
2519       SDLoc DL(Op);
2520       return DAG.getNode(
2521           RISCVISD::GREVI, DL, VT, LHS->Op,
2522           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
2523     }
2524   }
2525   return SDValue();
2526 }
2527 
2528 // Matches any the following pattern as a GORCI(W) operation
2529 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
2530 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
2531 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
2532 // Note that with the variant of 3.,
2533 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
2534 // the inner pattern will first be matched as GREVI and then the outer
2535 // pattern will be matched to GORC via the first rule above.
2536 // 4.  (or (rotl/rotr x, bitwidth/2), x)
2537 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
2538                                const RISCVSubtarget &Subtarget) {
2539   EVT VT = Op.getValueType();
2540 
2541   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
2542     SDLoc DL(Op);
2543     SDValue Op0 = Op.getOperand(0);
2544     SDValue Op1 = Op.getOperand(1);
2545 
2546     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
2547       if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
2548           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
2549         return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
2550       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
2551       if ((Reverse.getOpcode() == ISD::ROTL ||
2552            Reverse.getOpcode() == ISD::ROTR) &&
2553           Reverse.getOperand(0) == X &&
2554           isa<ConstantSDNode>(Reverse.getOperand(1))) {
2555         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
2556         if (RotAmt == (VT.getSizeInBits() / 2))
2557           return DAG.getNode(
2558               RISCVISD::GORCI, DL, VT, X,
2559               DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
2560       }
2561       return SDValue();
2562     };
2563 
2564     // Check for either commutable permutation of (or (GREVI x, shamt), x)
2565     if (SDValue V = MatchOROfReverse(Op0, Op1))
2566       return V;
2567     if (SDValue V = MatchOROfReverse(Op1, Op0))
2568       return V;
2569 
2570     // OR is commutable so canonicalize its OR operand to the left
2571     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
2572       std::swap(Op0, Op1);
2573     if (Op0.getOpcode() != ISD::OR)
2574       return SDValue();
2575     SDValue OrOp0 = Op0.getOperand(0);
2576     SDValue OrOp1 = Op0.getOperand(1);
2577     auto LHS = matchRISCVBitmanipPat(OrOp0);
2578     // OR is commutable so swap the operands and try again: x might have been
2579     // on the left
2580     if (!LHS) {
2581       std::swap(OrOp0, OrOp1);
2582       LHS = matchRISCVBitmanipPat(OrOp0);
2583     }
2584     auto RHS = matchRISCVBitmanipPat(Op1);
2585     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
2586       return DAG.getNode(
2587           RISCVISD::GORCI, DL, VT, LHS->Op,
2588           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
2589     }
2590   }
2591   return SDValue();
2592 }
2593 
2594 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
2595 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
2596 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
2597 // not undo itself, but they are redundant.
2598 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
2599   unsigned ShAmt1 = N->getConstantOperandVal(1);
2600   SDValue Src = N->getOperand(0);
2601 
2602   if (Src.getOpcode() != N->getOpcode())
2603     return SDValue();
2604 
2605   unsigned ShAmt2 = Src.getConstantOperandVal(1);
2606   Src = Src.getOperand(0);
2607 
2608   unsigned CombinedShAmt;
2609   if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
2610     CombinedShAmt = ShAmt1 | ShAmt2;
2611   else
2612     CombinedShAmt = ShAmt1 ^ ShAmt2;
2613 
2614   if (CombinedShAmt == 0)
2615     return Src;
2616 
2617   SDLoc DL(N);
2618   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
2619                      DAG.getTargetConstant(CombinedShAmt, DL,
2620                                            N->getOperand(1).getValueType()));
2621 }
2622 
2623 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2624                                                DAGCombinerInfo &DCI) const {
2625   SelectionDAG &DAG = DCI.DAG;
2626 
2627   switch (N->getOpcode()) {
2628   default:
2629     break;
2630   case RISCVISD::SplitF64: {
2631     SDValue Op0 = N->getOperand(0);
2632     // If the input to SplitF64 is just BuildPairF64 then the operation is
2633     // redundant. Instead, use BuildPairF64's operands directly.
2634     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
2635       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
2636 
2637     SDLoc DL(N);
2638 
2639     // It's cheaper to materialise two 32-bit integers than to load a double
2640     // from the constant pool and transfer it to integer registers through the
2641     // stack.
2642     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
2643       APInt V = C->getValueAPF().bitcastToAPInt();
2644       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
2645       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
2646       return DCI.CombineTo(N, Lo, Hi);
2647     }
2648 
2649     // This is a target-specific version of a DAGCombine performed in
2650     // DAGCombiner::visitBITCAST. It performs the equivalent of:
2651     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
2652     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
2653     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
2654         !Op0.getNode()->hasOneUse())
2655       break;
2656     SDValue NewSplitF64 =
2657         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
2658                     Op0.getOperand(0));
2659     SDValue Lo = NewSplitF64.getValue(0);
2660     SDValue Hi = NewSplitF64.getValue(1);
2661     APInt SignBit = APInt::getSignMask(32);
2662     if (Op0.getOpcode() == ISD::FNEG) {
2663       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
2664                                   DAG.getConstant(SignBit, DL, MVT::i32));
2665       return DCI.CombineTo(N, Lo, NewHi);
2666     }
2667     assert(Op0.getOpcode() == ISD::FABS);
2668     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
2669                                 DAG.getConstant(~SignBit, DL, MVT::i32));
2670     return DCI.CombineTo(N, Lo, NewHi);
2671   }
2672   case RISCVISD::SLLW:
2673   case RISCVISD::SRAW:
2674   case RISCVISD::SRLW:
2675   case RISCVISD::ROLW:
2676   case RISCVISD::RORW: {
2677     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
2678     SDValue LHS = N->getOperand(0);
2679     SDValue RHS = N->getOperand(1);
2680     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
2681     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
2682     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
2683         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
2684       if (N->getOpcode() != ISD::DELETED_NODE)
2685         DCI.AddToWorklist(N);
2686       return SDValue(N, 0);
2687     }
2688     break;
2689   }
2690   case RISCVISD::FSL:
2691   case RISCVISD::FSR: {
2692     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
2693     SDValue ShAmt = N->getOperand(2);
2694     unsigned BitWidth = ShAmt.getValueSizeInBits();
2695     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
2696     APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
2697     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
2698       if (N->getOpcode() != ISD::DELETED_NODE)
2699         DCI.AddToWorklist(N);
2700       return SDValue(N, 0);
2701     }
2702     break;
2703   }
2704   case RISCVISD::FSLW:
2705   case RISCVISD::FSRW: {
2706     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
2707     // read.
2708     SDValue Op0 = N->getOperand(0);
2709     SDValue Op1 = N->getOperand(1);
2710     SDValue ShAmt = N->getOperand(2);
2711     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
2712     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
2713     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
2714         SimplifyDemandedBits(Op1, OpMask, DCI) ||
2715         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
2716       if (N->getOpcode() != ISD::DELETED_NODE)
2717         DCI.AddToWorklist(N);
2718       return SDValue(N, 0);
2719     }
2720     break;
2721   }
2722   case RISCVISD::GREVIW:
2723   case RISCVISD::GORCIW: {
2724     // Only the lower 32 bits of the first operand are read
2725     SDValue Op0 = N->getOperand(0);
2726     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
2727     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
2728       if (N->getOpcode() != ISD::DELETED_NODE)
2729         DCI.AddToWorklist(N);
2730       return SDValue(N, 0);
2731     }
2732 
2733     return combineGREVI_GORCI(N, DCI.DAG);
2734   }
2735   case RISCVISD::FMV_X_ANYEXTW_RV64: {
2736     SDLoc DL(N);
2737     SDValue Op0 = N->getOperand(0);
2738     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
2739     // conversion is unnecessary and can be replaced with an ANY_EXTEND
2740     // of the FMV_W_X_RV64 operand.
2741     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
2742       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
2743              "Unexpected value type!");
2744       return Op0.getOperand(0);
2745     }
2746 
2747     // This is a target-specific version of a DAGCombine performed in
2748     // DAGCombiner::visitBITCAST. It performs the equivalent of:
2749     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
2750     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
2751     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
2752         !Op0.getNode()->hasOneUse())
2753       break;
2754     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
2755                                  Op0.getOperand(0));
2756     APInt SignBit = APInt::getSignMask(32).sext(64);
2757     if (Op0.getOpcode() == ISD::FNEG)
2758       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
2759                          DAG.getConstant(SignBit, DL, MVT::i64));
2760 
2761     assert(Op0.getOpcode() == ISD::FABS);
2762     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
2763                        DAG.getConstant(~SignBit, DL, MVT::i64));
2764   }
2765   case RISCVISD::GREVI:
2766   case RISCVISD::GORCI:
2767     return combineGREVI_GORCI(N, DCI.DAG);
2768   case ISD::OR:
2769     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
2770       return GREV;
2771     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
2772       return GORC;
2773     break;
2774   case RISCVISD::SELECT_CC: {
2775     // Transform
2776     // (select_cc (xor X, 1), 0, setne, trueV, falseV) ->
2777     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
2778     // This can occur when legalizing some floating point comparisons.
2779     SDValue LHS = N->getOperand(0);
2780     SDValue RHS = N->getOperand(1);
2781     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
2782     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
2783     if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) &&
2784         LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) &&
2785         DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) {
2786       SDLoc DL(N);
2787       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
2788       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
2789       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
2790                          {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3),
2791                           N->getOperand(4)});
2792     }
2793     break;
2794   }
2795   case ISD::SETCC: {
2796     // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1.
2797     // Comparing with 0 may allow us to fold into bnez/beqz.
2798     SDValue LHS = N->getOperand(0);
2799     SDValue RHS = N->getOperand(1);
2800     if (LHS.getValueType().isScalableVector())
2801       break;
2802     auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2803     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
2804     if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) &&
2805         DAG.MaskedValueIsZero(LHS, Mask)) {
2806       SDLoc DL(N);
2807       SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType());
2808       CC = ISD::getSetCCInverse(CC, LHS.getValueType());
2809       return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC);
2810     }
2811     break;
2812   }
2813   }
2814 
2815   return SDValue();
2816 }
2817 
2818 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
2819     const SDNode *N, CombineLevel Level) const {
2820   // The following folds are only desirable if `(OP _, c1 << c2)` can be
2821   // materialised in fewer instructions than `(OP _, c1)`:
2822   //
2823   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2824   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2825   SDValue N0 = N->getOperand(0);
2826   EVT Ty = N0.getValueType();
2827   if (Ty.isScalarInteger() &&
2828       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
2829     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2830     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
2831     if (C1 && C2) {
2832       const APInt &C1Int = C1->getAPIntValue();
2833       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
2834 
2835       // We can materialise `c1 << c2` into an add immediate, so it's "free",
2836       // and the combine should happen, to potentially allow further combines
2837       // later.
2838       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
2839           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
2840         return true;
2841 
2842       // We can materialise `c1` in an add immediate, so it's "free", and the
2843       // combine should be prevented.
2844       if (C1Int.getMinSignedBits() <= 64 &&
2845           isLegalAddImmediate(C1Int.getSExtValue()))
2846         return false;
2847 
2848       // Neither constant will fit into an immediate, so find materialisation
2849       // costs.
2850       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
2851                                               Subtarget.is64Bit());
2852       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
2853           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
2854 
2855       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
2856       // combine should be prevented.
2857       if (C1Cost < ShiftedC1Cost)
2858         return false;
2859     }
2860   }
2861   return true;
2862 }
2863 
2864 bool RISCVTargetLowering::targetShrinkDemandedConstant(
2865     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2866     TargetLoweringOpt &TLO) const {
2867   // Delay this optimization as late as possible.
2868   if (!TLO.LegalOps)
2869     return false;
2870 
2871   EVT VT = Op.getValueType();
2872   if (VT.isVector())
2873     return false;
2874 
2875   // Only handle AND for now.
2876   if (Op.getOpcode() != ISD::AND)
2877     return false;
2878 
2879   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2880   if (!C)
2881     return false;
2882 
2883   const APInt &Mask = C->getAPIntValue();
2884 
2885   // Clear all non-demanded bits initially.
2886   APInt ShrunkMask = Mask & DemandedBits;
2887 
2888   // If the shrunk mask fits in sign extended 12 bits, let the target
2889   // independent code apply it.
2890   if (ShrunkMask.isSignedIntN(12))
2891     return false;
2892 
2893   // Try to make a smaller immediate by setting undemanded bits.
2894 
2895   // We need to be able to make a negative number through a combination of mask
2896   // and undemanded bits.
2897   APInt ExpandedMask = Mask | ~DemandedBits;
2898   if (!ExpandedMask.isNegative())
2899     return false;
2900 
2901   // What is the fewest number of bits we need to represent the negative number.
2902   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
2903 
2904   // Try to make a 12 bit negative immediate. If that fails try to make a 32
2905   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
2906   APInt NewMask = ShrunkMask;
2907   if (MinSignedBits <= 12)
2908     NewMask.setBitsFrom(11);
2909   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
2910     NewMask.setBitsFrom(31);
2911   else
2912     return false;
2913 
2914   // Sanity check that our new mask is a subset of the demanded mask.
2915   assert(NewMask.isSubsetOf(ExpandedMask));
2916 
2917   // If we aren't changing the mask, just return true to keep it and prevent
2918   // the caller from optimizing.
2919   if (NewMask == Mask)
2920     return true;
2921 
2922   // Replace the constant with the new mask.
2923   SDLoc DL(Op);
2924   SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
2925   SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
2926   return TLO.CombineTo(Op, NewOp);
2927 }
2928 
2929 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2930                                                         KnownBits &Known,
2931                                                         const APInt &DemandedElts,
2932                                                         const SelectionDAG &DAG,
2933                                                         unsigned Depth) const {
2934   unsigned BitWidth = Known.getBitWidth();
2935   unsigned Opc = Op.getOpcode();
2936   assert((Opc >= ISD::BUILTIN_OP_END ||
2937           Opc == ISD::INTRINSIC_WO_CHAIN ||
2938           Opc == ISD::INTRINSIC_W_CHAIN ||
2939           Opc == ISD::INTRINSIC_VOID) &&
2940          "Should use MaskedValueIsZero if you don't know whether Op"
2941          " is a target node!");
2942 
2943   Known.resetAll();
2944   switch (Opc) {
2945   default: break;
2946   case RISCVISD::REMUW: {
2947     KnownBits Known2;
2948     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
2949     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
2950     // We only care about the lower 32 bits.
2951     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
2952     // Restore the original width by sign extending.
2953     Known = Known.sext(BitWidth);
2954     break;
2955   }
2956   case RISCVISD::DIVUW: {
2957     KnownBits Known2;
2958     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
2959     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
2960     // We only care about the lower 32 bits.
2961     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
2962     // Restore the original width by sign extending.
2963     Known = Known.sext(BitWidth);
2964     break;
2965   }
2966   case RISCVISD::READ_VLENB:
2967     // We assume VLENB is at least 8 bytes.
2968     // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
2969     Known.Zero.setLowBits(3);
2970     break;
2971   }
2972 }
2973 
2974 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
2975     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
2976     unsigned Depth) const {
2977   switch (Op.getOpcode()) {
2978   default:
2979     break;
2980   case RISCVISD::SLLW:
2981   case RISCVISD::SRAW:
2982   case RISCVISD::SRLW:
2983   case RISCVISD::DIVW:
2984   case RISCVISD::DIVUW:
2985   case RISCVISD::REMUW:
2986   case RISCVISD::ROLW:
2987   case RISCVISD::RORW:
2988   case RISCVISD::GREVIW:
2989   case RISCVISD::GORCIW:
2990   case RISCVISD::FSLW:
2991   case RISCVISD::FSRW:
2992     // TODO: As the result is sign-extended, this is conservatively correct. A
2993     // more precise answer could be calculated for SRAW depending on known
2994     // bits in the shift amount.
2995     return 33;
2996   case RISCVISD::VMV_X_S:
2997     // The number of sign bits of the scalar result is computed by obtaining the
2998     // element type of the input vector operand, subtracting its width from the
2999     // XLEN, and then adding one (sign bit within the element type). If the
3000     // element type is wider than XLen, the least-significant XLEN bits are
3001     // taken.
3002     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
3003       return 1;
3004     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
3005   }
3006 
3007   return 1;
3008 }
3009 
3010 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
3011                                                   MachineBasicBlock *BB) {
3012   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
3013 
3014   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
3015   // Should the count have wrapped while it was being read, we need to try
3016   // again.
3017   // ...
3018   // read:
3019   // rdcycleh x3 # load high word of cycle
3020   // rdcycle  x2 # load low word of cycle
3021   // rdcycleh x4 # load high word of cycle
3022   // bne x3, x4, read # check if high word reads match, otherwise try again
3023   // ...
3024 
3025   MachineFunction &MF = *BB->getParent();
3026   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3027   MachineFunction::iterator It = ++BB->getIterator();
3028 
3029   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
3030   MF.insert(It, LoopMBB);
3031 
3032   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
3033   MF.insert(It, DoneMBB);
3034 
3035   // Transfer the remainder of BB and its successor edges to DoneMBB.
3036   DoneMBB->splice(DoneMBB->begin(), BB,
3037                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
3038   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
3039 
3040   BB->addSuccessor(LoopMBB);
3041 
3042   MachineRegisterInfo &RegInfo = MF.getRegInfo();
3043   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
3044   Register LoReg = MI.getOperand(0).getReg();
3045   Register HiReg = MI.getOperand(1).getReg();
3046   DebugLoc DL = MI.getDebugLoc();
3047 
3048   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
3049   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
3050       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
3051       .addReg(RISCV::X0);
3052   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
3053       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
3054       .addReg(RISCV::X0);
3055   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
3056       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
3057       .addReg(RISCV::X0);
3058 
3059   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
3060       .addReg(HiReg)
3061       .addReg(ReadAgainReg)
3062       .addMBB(LoopMBB);
3063 
3064   LoopMBB->addSuccessor(LoopMBB);
3065   LoopMBB->addSuccessor(DoneMBB);
3066 
3067   MI.eraseFromParent();
3068 
3069   return DoneMBB;
3070 }
3071 
3072 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
3073                                              MachineBasicBlock *BB) {
3074   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
3075 
3076   MachineFunction &MF = *BB->getParent();
3077   DebugLoc DL = MI.getDebugLoc();
3078   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3079   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
3080   Register LoReg = MI.getOperand(0).getReg();
3081   Register HiReg = MI.getOperand(1).getReg();
3082   Register SrcReg = MI.getOperand(2).getReg();
3083   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
3084   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
3085 
3086   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
3087                           RI);
3088   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
3089   MachineMemOperand *MMOLo =
3090       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
3091   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
3092       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
3093   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
3094       .addFrameIndex(FI)
3095       .addImm(0)
3096       .addMemOperand(MMOLo);
3097   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
3098       .addFrameIndex(FI)
3099       .addImm(4)
3100       .addMemOperand(MMOHi);
3101   MI.eraseFromParent(); // The pseudo instruction is gone now.
3102   return BB;
3103 }
3104 
3105 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
3106                                                  MachineBasicBlock *BB) {
3107   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
3108          "Unexpected instruction");
3109 
3110   MachineFunction &MF = *BB->getParent();
3111   DebugLoc DL = MI.getDebugLoc();
3112   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3113   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
3114   Register DstReg = MI.getOperand(0).getReg();
3115   Register LoReg = MI.getOperand(1).getReg();
3116   Register HiReg = MI.getOperand(2).getReg();
3117   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
3118   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
3119 
3120   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
3121   MachineMemOperand *MMOLo =
3122       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
3123   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
3124       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
3125   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
3126       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
3127       .addFrameIndex(FI)
3128       .addImm(0)
3129       .addMemOperand(MMOLo);
3130   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
3131       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
3132       .addFrameIndex(FI)
3133       .addImm(4)
3134       .addMemOperand(MMOHi);
3135   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
3136   MI.eraseFromParent(); // The pseudo instruction is gone now.
3137   return BB;
3138 }
3139 
3140 static bool isSelectPseudo(MachineInstr &MI) {
3141   switch (MI.getOpcode()) {
3142   default:
3143     return false;
3144   case RISCV::Select_GPR_Using_CC_GPR:
3145   case RISCV::Select_FPR16_Using_CC_GPR:
3146   case RISCV::Select_FPR32_Using_CC_GPR:
3147   case RISCV::Select_FPR64_Using_CC_GPR:
3148     return true;
3149   }
3150 }
3151 
3152 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
3153                                            MachineBasicBlock *BB) {
3154   // To "insert" Select_* instructions, we actually have to insert the triangle
3155   // control-flow pattern.  The incoming instructions know the destination vreg
3156   // to set, the condition code register to branch on, the true/false values to
3157   // select between, and the condcode to use to select the appropriate branch.
3158   //
3159   // We produce the following control flow:
3160   //     HeadMBB
3161   //     |  \
3162   //     |  IfFalseMBB
3163   //     | /
3164   //    TailMBB
3165   //
3166   // When we find a sequence of selects we attempt to optimize their emission
3167   // by sharing the control flow. Currently we only handle cases where we have
3168   // multiple selects with the exact same condition (same LHS, RHS and CC).
3169   // The selects may be interleaved with other instructions if the other
3170   // instructions meet some requirements we deem safe:
3171   // - They are debug instructions. Otherwise,
3172   // - They do not have side-effects, do not access memory and their inputs do
3173   //   not depend on the results of the select pseudo-instructions.
3174   // The TrueV/FalseV operands of the selects cannot depend on the result of
3175   // previous selects in the sequence.
3176   // These conditions could be further relaxed. See the X86 target for a
3177   // related approach and more information.
3178   Register LHS = MI.getOperand(1).getReg();
3179   Register RHS = MI.getOperand(2).getReg();
3180   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
3181 
3182   SmallVector<MachineInstr *, 4> SelectDebugValues;
3183   SmallSet<Register, 4> SelectDests;
3184   SelectDests.insert(MI.getOperand(0).getReg());
3185 
3186   MachineInstr *LastSelectPseudo = &MI;
3187 
3188   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
3189        SequenceMBBI != E; ++SequenceMBBI) {
3190     if (SequenceMBBI->isDebugInstr())
3191       continue;
3192     else if (isSelectPseudo(*SequenceMBBI)) {
3193       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
3194           SequenceMBBI->getOperand(2).getReg() != RHS ||
3195           SequenceMBBI->getOperand(3).getImm() != CC ||
3196           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
3197           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
3198         break;
3199       LastSelectPseudo = &*SequenceMBBI;
3200       SequenceMBBI->collectDebugValues(SelectDebugValues);
3201       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
3202     } else {
3203       if (SequenceMBBI->hasUnmodeledSideEffects() ||
3204           SequenceMBBI->mayLoadOrStore())
3205         break;
3206       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
3207             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
3208           }))
3209         break;
3210     }
3211   }
3212 
3213   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
3214   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3215   DebugLoc DL = MI.getDebugLoc();
3216   MachineFunction::iterator I = ++BB->getIterator();
3217 
3218   MachineBasicBlock *HeadMBB = BB;
3219   MachineFunction *F = BB->getParent();
3220   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
3221   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
3222 
3223   F->insert(I, IfFalseMBB);
3224   F->insert(I, TailMBB);
3225 
3226   // Transfer debug instructions associated with the selects to TailMBB.
3227   for (MachineInstr *DebugInstr : SelectDebugValues) {
3228     TailMBB->push_back(DebugInstr->removeFromParent());
3229   }
3230 
3231   // Move all instructions after the sequence to TailMBB.
3232   TailMBB->splice(TailMBB->end(), HeadMBB,
3233                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
3234   // Update machine-CFG edges by transferring all successors of the current
3235   // block to the new block which will contain the Phi nodes for the selects.
3236   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
3237   // Set the successors for HeadMBB.
3238   HeadMBB->addSuccessor(IfFalseMBB);
3239   HeadMBB->addSuccessor(TailMBB);
3240 
3241   // Insert appropriate branch.
3242   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
3243 
3244   BuildMI(HeadMBB, DL, TII.get(Opcode))
3245     .addReg(LHS)
3246     .addReg(RHS)
3247     .addMBB(TailMBB);
3248 
3249   // IfFalseMBB just falls through to TailMBB.
3250   IfFalseMBB->addSuccessor(TailMBB);
3251 
3252   // Create PHIs for all of the select pseudo-instructions.
3253   auto SelectMBBI = MI.getIterator();
3254   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
3255   auto InsertionPoint = TailMBB->begin();
3256   while (SelectMBBI != SelectEnd) {
3257     auto Next = std::next(SelectMBBI);
3258     if (isSelectPseudo(*SelectMBBI)) {
3259       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
3260       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
3261               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
3262           .addReg(SelectMBBI->getOperand(4).getReg())
3263           .addMBB(HeadMBB)
3264           .addReg(SelectMBBI->getOperand(5).getReg())
3265           .addMBB(IfFalseMBB);
3266       SelectMBBI->eraseFromParent();
3267     }
3268     SelectMBBI = Next;
3269   }
3270 
3271   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
3272   return TailMBB;
3273 }
3274 
3275 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
3276                                     int VLIndex, unsigned SEWIndex,
3277                                     RISCVVLMUL VLMul, bool ForceTailAgnostic) {
3278   MachineFunction &MF = *BB->getParent();
3279   DebugLoc DL = MI.getDebugLoc();
3280   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3281 
3282   unsigned SEW = MI.getOperand(SEWIndex).getImm();
3283   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
3284   RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
3285 
3286   MachineRegisterInfo &MRI = MF.getRegInfo();
3287 
3288   // VL and VTYPE are alive here.
3289   MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI));
3290 
3291   if (VLIndex >= 0) {
3292     // Set VL (rs1 != X0).
3293     Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3294     MIB.addReg(DestReg, RegState::Define | RegState::Dead)
3295         .addReg(MI.getOperand(VLIndex).getReg());
3296   } else
3297     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
3298     MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead)
3299         .addReg(RISCV::X0, RegState::Kill);
3300 
3301   // Default to tail agnostic unless the destination is tied to a source. In
3302   // that case the user would have some control over the tail values. The tail
3303   // policy is also ignored on instructions that only update element 0 like
3304   // vmv.s.x or reductions so use agnostic there to match the common case.
3305   // FIXME: This is conservatively correct, but we might want to detect that
3306   // the input is undefined.
3307   bool TailAgnostic = true;
3308   unsigned UseOpIdx;
3309   if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
3310     TailAgnostic = false;
3311     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
3312     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
3313     MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
3314     if (UseMI && UseMI->isImplicitDef())
3315       TailAgnostic = true;
3316   }
3317 
3318   // For simplicity we reuse the vtype representation here.
3319   MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
3320                                      /*TailAgnostic*/ TailAgnostic,
3321                                      /*MaskAgnostic*/ false));
3322 
3323   // Remove (now) redundant operands from pseudo
3324   MI.getOperand(SEWIndex).setImm(-1);
3325   if (VLIndex >= 0) {
3326     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
3327     MI.getOperand(VLIndex).setIsKill(false);
3328   }
3329 
3330   return BB;
3331 }
3332 
3333 MachineBasicBlock *
3334 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3335                                                  MachineBasicBlock *BB) const {
3336   uint64_t TSFlags = MI.getDesc().TSFlags;
3337 
3338   if (TSFlags & RISCVII::HasSEWOpMask) {
3339     unsigned NumOperands = MI.getNumExplicitOperands();
3340     int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
3341     unsigned SEWIndex = NumOperands - 1;
3342     bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask;
3343 
3344     RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
3345                                                RISCVII::VLMulShift);
3346     return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic);
3347   }
3348 
3349   switch (MI.getOpcode()) {
3350   default:
3351     llvm_unreachable("Unexpected instr type to insert");
3352   case RISCV::ReadCycleWide:
3353     assert(!Subtarget.is64Bit() &&
3354            "ReadCycleWrite is only to be used on riscv32");
3355     return emitReadCycleWidePseudo(MI, BB);
3356   case RISCV::Select_GPR_Using_CC_GPR:
3357   case RISCV::Select_FPR16_Using_CC_GPR:
3358   case RISCV::Select_FPR32_Using_CC_GPR:
3359   case RISCV::Select_FPR64_Using_CC_GPR:
3360     return emitSelectPseudo(MI, BB);
3361   case RISCV::BuildPairF64Pseudo:
3362     return emitBuildPairF64Pseudo(MI, BB);
3363   case RISCV::SplitF64Pseudo:
3364     return emitSplitF64Pseudo(MI, BB);
3365   }
3366 }
3367 
3368 // Calling Convention Implementation.
3369 // The expectations for frontend ABI lowering vary from target to target.
3370 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
3371 // details, but this is a longer term goal. For now, we simply try to keep the
3372 // role of the frontend as simple and well-defined as possible. The rules can
3373 // be summarised as:
3374 // * Never split up large scalar arguments. We handle them here.
3375 // * If a hardfloat calling convention is being used, and the struct may be
3376 // passed in a pair of registers (fp+fp, int+fp), and both registers are
3377 // available, then pass as two separate arguments. If either the GPRs or FPRs
3378 // are exhausted, then pass according to the rule below.
3379 // * If a struct could never be passed in registers or directly in a stack
3380 // slot (as it is larger than 2*XLEN and the floating point rules don't
3381 // apply), then pass it using a pointer with the byval attribute.
3382 // * If a struct is less than 2*XLEN, then coerce to either a two-element
3383 // word-sized array or a 2*XLEN scalar (depending on alignment).
3384 // * The frontend can determine whether a struct is returned by reference or
3385 // not based on its size and fields. If it will be returned by reference, the
3386 // frontend must modify the prototype so a pointer with the sret annotation is
3387 // passed as the first argument. This is not necessary for large scalar
3388 // returns.
3389 // * Struct return values and varargs should be coerced to structs containing
3390 // register-size fields in the same situations they would be for fixed
3391 // arguments.
3392 
3393 static const MCPhysReg ArgGPRs[] = {
3394   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
3395   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
3396 };
3397 static const MCPhysReg ArgFPR16s[] = {
3398   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
3399   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
3400 };
3401 static const MCPhysReg ArgFPR32s[] = {
3402   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
3403   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
3404 };
3405 static const MCPhysReg ArgFPR64s[] = {
3406   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
3407   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
3408 };
3409 // This is an interim calling convention and it may be changed in the future.
3410 static const MCPhysReg ArgVRs[] = {
3411     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
3412     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
3413     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
3414 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
3415                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
3416                                      RISCV::V20M2, RISCV::V22M2};
3417 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
3418                                      RISCV::V20M4};
3419 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
3420 
3421 // Pass a 2*XLEN argument that has been split into two XLEN values through
3422 // registers or the stack as necessary.
3423 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
3424                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
3425                                 MVT ValVT2, MVT LocVT2,
3426                                 ISD::ArgFlagsTy ArgFlags2) {
3427   unsigned XLenInBytes = XLen / 8;
3428   if (Register Reg = State.AllocateReg(ArgGPRs)) {
3429     // At least one half can be passed via register.
3430     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3431                                      VA1.getLocVT(), CCValAssign::Full));
3432   } else {
3433     // Both halves must be passed on the stack, with proper alignment.
3434     Align StackAlign =
3435         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3436     State.addLoc(
3437         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
3438                             State.AllocateStack(XLenInBytes, StackAlign),
3439                             VA1.getLocVT(), CCValAssign::Full));
3440     State.addLoc(CCValAssign::getMem(
3441         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
3442         LocVT2, CCValAssign::Full));
3443     return false;
3444   }
3445 
3446   if (Register Reg = State.AllocateReg(ArgGPRs)) {
3447     // The second half can also be passed via register.
3448     State.addLoc(
3449         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3450   } else {
3451     // The second half is passed via the stack, without additional alignment.
3452     State.addLoc(CCValAssign::getMem(
3453         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
3454         LocVT2, CCValAssign::Full));
3455   }
3456 
3457   return false;
3458 }
3459 
3460 // Implements the RISC-V calling convention. Returns true upon failure.
3461 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
3462                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
3463                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
3464                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
3465                      Optional<unsigned> FirstMaskArgument) {
3466   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
3467   assert(XLen == 32 || XLen == 64);
3468   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
3469 
3470   // Any return value split in to more than two values can't be returned
3471   // directly.
3472   if (IsRet && ValNo > 1)
3473     return true;
3474 
3475   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
3476   // variadic argument, or if no F16/F32 argument registers are available.
3477   bool UseGPRForF16_F32 = true;
3478   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
3479   // variadic argument, or if no F64 argument registers are available.
3480   bool UseGPRForF64 = true;
3481 
3482   switch (ABI) {
3483   default:
3484     llvm_unreachable("Unexpected ABI");
3485   case RISCVABI::ABI_ILP32:
3486   case RISCVABI::ABI_LP64:
3487     break;
3488   case RISCVABI::ABI_ILP32F:
3489   case RISCVABI::ABI_LP64F:
3490     UseGPRForF16_F32 = !IsFixed;
3491     break;
3492   case RISCVABI::ABI_ILP32D:
3493   case RISCVABI::ABI_LP64D:
3494     UseGPRForF16_F32 = !IsFixed;
3495     UseGPRForF64 = !IsFixed;
3496     break;
3497   }
3498 
3499   // FPR16, FPR32, and FPR64 alias each other.
3500   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
3501     UseGPRForF16_F32 = true;
3502     UseGPRForF64 = true;
3503   }
3504 
3505   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
3506   // similar local variables rather than directly checking against the target
3507   // ABI.
3508 
3509   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
3510     LocVT = XLenVT;
3511     LocInfo = CCValAssign::BCvt;
3512   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
3513     LocVT = MVT::i64;
3514     LocInfo = CCValAssign::BCvt;
3515   }
3516 
3517   // If this is a variadic argument, the RISC-V calling convention requires
3518   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
3519   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
3520   // be used regardless of whether the original argument was split during
3521   // legalisation or not. The argument will not be passed by registers if the
3522   // original type is larger than 2*XLEN, so the register alignment rule does
3523   // not apply.
3524   unsigned TwoXLenInBytes = (2 * XLen) / 8;
3525   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
3526       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
3527     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3528     // Skip 'odd' register if necessary.
3529     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
3530       State.AllocateReg(ArgGPRs);
3531   }
3532 
3533   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3534   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3535       State.getPendingArgFlags();
3536 
3537   assert(PendingLocs.size() == PendingArgFlags.size() &&
3538          "PendingLocs and PendingArgFlags out of sync");
3539 
3540   // Handle passing f64 on RV32D with a soft float ABI or when floating point
3541   // registers are exhausted.
3542   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
3543     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
3544            "Can't lower f64 if it is split");
3545     // Depending on available argument GPRS, f64 may be passed in a pair of
3546     // GPRs, split between a GPR and the stack, or passed completely on the
3547     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
3548     // cases.
3549     Register Reg = State.AllocateReg(ArgGPRs);
3550     LocVT = MVT::i32;
3551     if (!Reg) {
3552       unsigned StackOffset = State.AllocateStack(8, Align(8));
3553       State.addLoc(
3554           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3555       return false;
3556     }
3557     if (!State.AllocateReg(ArgGPRs))
3558       State.AllocateStack(4, Align(4));
3559     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3560     return false;
3561   }
3562 
3563   // Split arguments might be passed indirectly, so keep track of the pending
3564   // values.
3565   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
3566     LocVT = XLenVT;
3567     LocInfo = CCValAssign::Indirect;
3568     PendingLocs.push_back(
3569         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3570     PendingArgFlags.push_back(ArgFlags);
3571     if (!ArgFlags.isSplitEnd()) {
3572       return false;
3573     }
3574   }
3575 
3576   // If the split argument only had two elements, it should be passed directly
3577   // in registers or on the stack.
3578   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
3579     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3580     // Apply the normal calling convention rules to the first half of the
3581     // split argument.
3582     CCValAssign VA = PendingLocs[0];
3583     ISD::ArgFlagsTy AF = PendingArgFlags[0];
3584     PendingLocs.clear();
3585     PendingArgFlags.clear();
3586     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
3587                                ArgFlags);
3588   }
3589 
3590   // Allocate to a register if possible, or else a stack slot.
3591   Register Reg;
3592   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
3593     Reg = State.AllocateReg(ArgFPR16s);
3594   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
3595     Reg = State.AllocateReg(ArgFPR32s);
3596   else if (ValVT == MVT::f64 && !UseGPRForF64)
3597     Reg = State.AllocateReg(ArgFPR64s);
3598   else if (ValVT.isScalableVector()) {
3599     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
3600     if (RC == &RISCV::VRRegClass) {
3601       // Assign the first mask argument to V0.
3602       // This is an interim calling convention and it may be changed in the
3603       // future.
3604       if (FirstMaskArgument.hasValue() &&
3605           ValNo == FirstMaskArgument.getValue()) {
3606         Reg = State.AllocateReg(RISCV::V0);
3607       } else {
3608         Reg = State.AllocateReg(ArgVRs);
3609       }
3610     } else if (RC == &RISCV::VRM2RegClass) {
3611       Reg = State.AllocateReg(ArgVRM2s);
3612     } else if (RC == &RISCV::VRM4RegClass) {
3613       Reg = State.AllocateReg(ArgVRM4s);
3614     } else if (RC == &RISCV::VRM8RegClass) {
3615       Reg = State.AllocateReg(ArgVRM8s);
3616     } else {
3617       llvm_unreachable("Unhandled class register for ValueType");
3618     }
3619     if (!Reg) {
3620       LocInfo = CCValAssign::Indirect;
3621       // Try using a GPR to pass the address
3622       Reg = State.AllocateReg(ArgGPRs);
3623       LocVT = XLenVT;
3624     }
3625   } else
3626     Reg = State.AllocateReg(ArgGPRs);
3627   unsigned StackOffset =
3628       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
3629 
3630   // If we reach this point and PendingLocs is non-empty, we must be at the
3631   // end of a split argument that must be passed indirectly.
3632   if (!PendingLocs.empty()) {
3633     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3634     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3635 
3636     for (auto &It : PendingLocs) {
3637       if (Reg)
3638         It.convertToReg(Reg);
3639       else
3640         It.convertToMem(StackOffset);
3641       State.addLoc(It);
3642     }
3643     PendingLocs.clear();
3644     PendingArgFlags.clear();
3645     return false;
3646   }
3647 
3648   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
3649           (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
3650          "Expected an XLenVT or scalable vector types at this stage");
3651 
3652   if (Reg) {
3653     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3654     return false;
3655   }
3656 
3657   // When a floating-point value is passed on the stack, no bit-conversion is
3658   // needed.
3659   if (ValVT.isFloatingPoint()) {
3660     LocVT = ValVT;
3661     LocInfo = CCValAssign::Full;
3662   }
3663   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3664   return false;
3665 }
3666 
3667 template <typename ArgTy>
3668 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
3669   for (const auto &ArgIdx : enumerate(Args)) {
3670     MVT ArgVT = ArgIdx.value().VT;
3671     if (ArgVT.isScalableVector() &&
3672         ArgVT.getVectorElementType().SimpleTy == MVT::i1)
3673       return ArgIdx.index();
3674   }
3675   return None;
3676 }
3677 
3678 void RISCVTargetLowering::analyzeInputArgs(
3679     MachineFunction &MF, CCState &CCInfo,
3680     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
3681   unsigned NumArgs = Ins.size();
3682   FunctionType *FType = MF.getFunction().getFunctionType();
3683 
3684   Optional<unsigned> FirstMaskArgument;
3685   if (Subtarget.hasStdExtV())
3686     FirstMaskArgument = preAssignMask(Ins);
3687 
3688   for (unsigned i = 0; i != NumArgs; ++i) {
3689     MVT ArgVT = Ins[i].VT;
3690     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
3691 
3692     Type *ArgTy = nullptr;
3693     if (IsRet)
3694       ArgTy = FType->getReturnType();
3695     else if (Ins[i].isOrigArg())
3696       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3697 
3698     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3699     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
3700                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
3701                  FirstMaskArgument)) {
3702       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
3703                         << EVT(ArgVT).getEVTString() << '\n');
3704       llvm_unreachable(nullptr);
3705     }
3706   }
3707 }
3708 
3709 void RISCVTargetLowering::analyzeOutputArgs(
3710     MachineFunction &MF, CCState &CCInfo,
3711     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3712     CallLoweringInfo *CLI) const {
3713   unsigned NumArgs = Outs.size();
3714 
3715   Optional<unsigned> FirstMaskArgument;
3716   if (Subtarget.hasStdExtV())
3717     FirstMaskArgument = preAssignMask(Outs);
3718 
3719   for (unsigned i = 0; i != NumArgs; i++) {
3720     MVT ArgVT = Outs[i].VT;
3721     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3722     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3723 
3724     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3725     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
3726                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
3727                  FirstMaskArgument)) {
3728       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
3729                         << EVT(ArgVT).getEVTString() << "\n");
3730       llvm_unreachable(nullptr);
3731     }
3732   }
3733 }
3734 
3735 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3736 // values.
3737 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
3738                                    const CCValAssign &VA, const SDLoc &DL) {
3739   switch (VA.getLocInfo()) {
3740   default:
3741     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3742   case CCValAssign::Full:
3743     break;
3744   case CCValAssign::BCvt:
3745     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
3746       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
3747     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3748       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
3749     else
3750       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3751     break;
3752   }
3753   return Val;
3754 }
3755 
3756 // The caller is responsible for loading the full value if the argument is
3757 // passed with CCValAssign::Indirect.
3758 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
3759                                 const CCValAssign &VA, const SDLoc &DL,
3760                                 const RISCVTargetLowering &TLI) {
3761   MachineFunction &MF = DAG.getMachineFunction();
3762   MachineRegisterInfo &RegInfo = MF.getRegInfo();
3763   EVT LocVT = VA.getLocVT();
3764   SDValue Val;
3765   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3766   Register VReg = RegInfo.createVirtualRegister(RC);
3767   RegInfo.addLiveIn(VA.getLocReg(), VReg);
3768   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3769 
3770   if (VA.getLocInfo() == CCValAssign::Indirect)
3771     return Val;
3772 
3773   return convertLocVTToValVT(DAG, Val, VA, DL);
3774 }
3775 
3776 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
3777                                    const CCValAssign &VA, const SDLoc &DL) {
3778   EVT LocVT = VA.getLocVT();
3779 
3780   switch (VA.getLocInfo()) {
3781   default:
3782     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3783   case CCValAssign::Full:
3784     break;
3785   case CCValAssign::BCvt:
3786     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
3787       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
3788     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3789       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
3790     else
3791       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3792     break;
3793   }
3794   return Val;
3795 }
3796 
3797 // The caller is responsible for loading the full value if the argument is
3798 // passed with CCValAssign::Indirect.
3799 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
3800                                 const CCValAssign &VA, const SDLoc &DL) {
3801   MachineFunction &MF = DAG.getMachineFunction();
3802   MachineFrameInfo &MFI = MF.getFrameInfo();
3803   EVT LocVT = VA.getLocVT();
3804   EVT ValVT = VA.getValVT();
3805   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
3806   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3807                                  VA.getLocMemOffset(), /*Immutable=*/true);
3808   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3809   SDValue Val;
3810 
3811   ISD::LoadExtType ExtType;
3812   switch (VA.getLocInfo()) {
3813   default:
3814     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3815   case CCValAssign::Full:
3816   case CCValAssign::Indirect:
3817   case CCValAssign::BCvt:
3818     ExtType = ISD::NON_EXTLOAD;
3819     break;
3820   }
3821   Val = DAG.getExtLoad(
3822       ExtType, DL, LocVT, Chain, FIN,
3823       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
3824   return Val;
3825 }
3826 
3827 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
3828                                        const CCValAssign &VA, const SDLoc &DL) {
3829   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
3830          "Unexpected VA");
3831   MachineFunction &MF = DAG.getMachineFunction();
3832   MachineFrameInfo &MFI = MF.getFrameInfo();
3833   MachineRegisterInfo &RegInfo = MF.getRegInfo();
3834 
3835   if (VA.isMemLoc()) {
3836     // f64 is passed on the stack.
3837     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
3838     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
3839     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
3840                        MachinePointerInfo::getFixedStack(MF, FI));
3841   }
3842 
3843   assert(VA.isRegLoc() && "Expected register VA assignment");
3844 
3845   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
3846   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
3847   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
3848   SDValue Hi;
3849   if (VA.getLocReg() == RISCV::X17) {
3850     // Second half of f64 is passed on the stack.
3851     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
3852     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
3853     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
3854                      MachinePointerInfo::getFixedStack(MF, FI));
3855   } else {
3856     // Second half of f64 is passed in another GPR.
3857     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
3858     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
3859     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
3860   }
3861   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
3862 }
3863 
3864 // FastCC has less than 1% performance improvement for some particular
3865 // benchmark. But theoretically, it may has benenfit for some cases.
3866 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
3867                             CCValAssign::LocInfo LocInfo,
3868                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
3869 
3870   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3871     // X5 and X6 might be used for save-restore libcall.
3872     static const MCPhysReg GPRList[] = {
3873         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
3874         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
3875         RISCV::X29, RISCV::X30, RISCV::X31};
3876     if (unsigned Reg = State.AllocateReg(GPRList)) {
3877       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3878       return false;
3879     }
3880   }
3881 
3882   if (LocVT == MVT::f16) {
3883     static const MCPhysReg FPR16List[] = {
3884         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
3885         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
3886         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
3887         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
3888     if (unsigned Reg = State.AllocateReg(FPR16List)) {
3889       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3890       return false;
3891     }
3892   }
3893 
3894   if (LocVT == MVT::f32) {
3895     static const MCPhysReg FPR32List[] = {
3896         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
3897         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
3898         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
3899         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
3900     if (unsigned Reg = State.AllocateReg(FPR32List)) {
3901       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3902       return false;
3903     }
3904   }
3905 
3906   if (LocVT == MVT::f64) {
3907     static const MCPhysReg FPR64List[] = {
3908         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
3909         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
3910         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
3911         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
3912     if (unsigned Reg = State.AllocateReg(FPR64List)) {
3913       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3914       return false;
3915     }
3916   }
3917 
3918   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
3919     unsigned Offset4 = State.AllocateStack(4, Align(4));
3920     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
3921     return false;
3922   }
3923 
3924   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
3925     unsigned Offset5 = State.AllocateStack(8, Align(8));
3926     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
3927     return false;
3928   }
3929 
3930   return true; // CC didn't match.
3931 }
3932 
3933 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3934                          CCValAssign::LocInfo LocInfo,
3935                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
3936 
3937   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3938     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
3939     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
3940     static const MCPhysReg GPRList[] = {
3941         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
3942         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
3943     if (unsigned Reg = State.AllocateReg(GPRList)) {
3944       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3945       return false;
3946     }
3947   }
3948 
3949   if (LocVT == MVT::f32) {
3950     // Pass in STG registers: F1, ..., F6
3951     //                        fs0 ... fs5
3952     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
3953                                           RISCV::F18_F, RISCV::F19_F,
3954                                           RISCV::F20_F, RISCV::F21_F};
3955     if (unsigned Reg = State.AllocateReg(FPR32List)) {
3956       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3957       return false;
3958     }
3959   }
3960 
3961   if (LocVT == MVT::f64) {
3962     // Pass in STG registers: D1, ..., D6
3963     //                        fs6 ... fs11
3964     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
3965                                           RISCV::F24_D, RISCV::F25_D,
3966                                           RISCV::F26_D, RISCV::F27_D};
3967     if (unsigned Reg = State.AllocateReg(FPR64List)) {
3968       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3969       return false;
3970     }
3971   }
3972 
3973   report_fatal_error("No registers left in GHC calling convention");
3974   return true;
3975 }
3976 
3977 // Transform physical registers into virtual registers.
3978 SDValue RISCVTargetLowering::LowerFormalArguments(
3979     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3980     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3981     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3982 
3983   MachineFunction &MF = DAG.getMachineFunction();
3984 
3985   switch (CallConv) {
3986   default:
3987     report_fatal_error("Unsupported calling convention");
3988   case CallingConv::C:
3989   case CallingConv::Fast:
3990     break;
3991   case CallingConv::GHC:
3992     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
3993         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
3994       report_fatal_error(
3995         "GHC calling convention requires the F and D instruction set extensions");
3996   }
3997 
3998   const Function &Func = MF.getFunction();
3999   if (Func.hasFnAttribute("interrupt")) {
4000     if (!Func.arg_empty())
4001       report_fatal_error(
4002         "Functions with the interrupt attribute cannot have arguments!");
4003 
4004     StringRef Kind =
4005       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
4006 
4007     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
4008       report_fatal_error(
4009         "Function interrupt attribute argument not supported!");
4010   }
4011 
4012   EVT PtrVT = getPointerTy(DAG.getDataLayout());
4013   MVT XLenVT = Subtarget.getXLenVT();
4014   unsigned XLenInBytes = Subtarget.getXLen() / 8;
4015   // Used with vargs to acumulate store chains.
4016   std::vector<SDValue> OutChains;
4017 
4018   // Assign locations to all of the incoming arguments.
4019   SmallVector<CCValAssign, 16> ArgLocs;
4020   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4021 
4022   if (CallConv == CallingConv::Fast)
4023     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
4024   else if (CallConv == CallingConv::GHC)
4025     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
4026   else
4027     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
4028 
4029   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4030     CCValAssign &VA = ArgLocs[i];
4031     SDValue ArgValue;
4032     // Passing f64 on RV32D with a soft float ABI must be handled as a special
4033     // case.
4034     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
4035       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
4036     else if (VA.isRegLoc())
4037       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
4038     else
4039       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
4040 
4041     if (VA.getLocInfo() == CCValAssign::Indirect) {
4042       // If the original argument was split and passed by reference (e.g. i128
4043       // on RV32), we need to load all parts of it here (using the same
4044       // address).
4045       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
4046                                    MachinePointerInfo()));
4047       unsigned ArgIndex = Ins[i].OrigArgIndex;
4048       assert(Ins[i].PartOffset == 0);
4049       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
4050         CCValAssign &PartVA = ArgLocs[i + 1];
4051         unsigned PartOffset = Ins[i + 1].PartOffset;
4052         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
4053                                       DAG.getIntPtrConstant(PartOffset, DL));
4054         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
4055                                      MachinePointerInfo()));
4056         ++i;
4057       }
4058       continue;
4059     }
4060     InVals.push_back(ArgValue);
4061   }
4062 
4063   if (IsVarArg) {
4064     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
4065     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
4066     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
4067     MachineFrameInfo &MFI = MF.getFrameInfo();
4068     MachineRegisterInfo &RegInfo = MF.getRegInfo();
4069     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
4070 
4071     // Offset of the first variable argument from stack pointer, and size of
4072     // the vararg save area. For now, the varargs save area is either zero or
4073     // large enough to hold a0-a7.
4074     int VaArgOffset, VarArgsSaveSize;
4075 
4076     // If all registers are allocated, then all varargs must be passed on the
4077     // stack and we don't need to save any argregs.
4078     if (ArgRegs.size() == Idx) {
4079       VaArgOffset = CCInfo.getNextStackOffset();
4080       VarArgsSaveSize = 0;
4081     } else {
4082       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
4083       VaArgOffset = -VarArgsSaveSize;
4084     }
4085 
4086     // Record the frame index of the first variable argument
4087     // which is a value necessary to VASTART.
4088     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
4089     RVFI->setVarArgsFrameIndex(FI);
4090 
4091     // If saving an odd number of registers then create an extra stack slot to
4092     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
4093     // offsets to even-numbered registered remain 2*XLEN-aligned.
4094     if (Idx % 2) {
4095       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
4096       VarArgsSaveSize += XLenInBytes;
4097     }
4098 
4099     // Copy the integer registers that may have been used for passing varargs
4100     // to the vararg save area.
4101     for (unsigned I = Idx; I < ArgRegs.size();
4102          ++I, VaArgOffset += XLenInBytes) {
4103       const Register Reg = RegInfo.createVirtualRegister(RC);
4104       RegInfo.addLiveIn(ArgRegs[I], Reg);
4105       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
4106       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
4107       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4108       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
4109                                    MachinePointerInfo::getFixedStack(MF, FI));
4110       cast<StoreSDNode>(Store.getNode())
4111           ->getMemOperand()
4112           ->setValue((Value *)nullptr);
4113       OutChains.push_back(Store);
4114     }
4115     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
4116   }
4117 
4118   // All stores are grouped in one node to allow the matching between
4119   // the size of Ins and InVals. This only happens for vararg functions.
4120   if (!OutChains.empty()) {
4121     OutChains.push_back(Chain);
4122     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
4123   }
4124 
4125   return Chain;
4126 }
4127 
4128 /// isEligibleForTailCallOptimization - Check whether the call is eligible
4129 /// for tail call optimization.
4130 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
4131 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
4132     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4133     const SmallVector<CCValAssign, 16> &ArgLocs) const {
4134 
4135   auto &Callee = CLI.Callee;
4136   auto CalleeCC = CLI.CallConv;
4137   auto &Outs = CLI.Outs;
4138   auto &Caller = MF.getFunction();
4139   auto CallerCC = Caller.getCallingConv();
4140 
4141   // Exception-handling functions need a special set of instructions to
4142   // indicate a return to the hardware. Tail-calling another function would
4143   // probably break this.
4144   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
4145   // should be expanded as new function attributes are introduced.
4146   if (Caller.hasFnAttribute("interrupt"))
4147     return false;
4148 
4149   // Do not tail call opt if the stack is used to pass parameters.
4150   if (CCInfo.getNextStackOffset() != 0)
4151     return false;
4152 
4153   // Do not tail call opt if any parameters need to be passed indirectly.
4154   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
4155   // passed indirectly. So the address of the value will be passed in a
4156   // register, or if not available, then the address is put on the stack. In
4157   // order to pass indirectly, space on the stack often needs to be allocated
4158   // in order to store the value. In this case the CCInfo.getNextStackOffset()
4159   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
4160   // are passed CCValAssign::Indirect.
4161   for (auto &VA : ArgLocs)
4162     if (VA.getLocInfo() == CCValAssign::Indirect)
4163       return false;
4164 
4165   // Do not tail call opt if either caller or callee uses struct return
4166   // semantics.
4167   auto IsCallerStructRet = Caller.hasStructRetAttr();
4168   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4169   if (IsCallerStructRet || IsCalleeStructRet)
4170     return false;
4171 
4172   // Externally-defined functions with weak linkage should not be
4173   // tail-called. The behaviour of branch instructions in this situation (as
4174   // used for tail calls) is implementation-defined, so we cannot rely on the
4175   // linker replacing the tail call with a return.
4176   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4177     const GlobalValue *GV = G->getGlobal();
4178     if (GV->hasExternalWeakLinkage())
4179       return false;
4180   }
4181 
4182   // The callee has to preserve all registers the caller needs to preserve.
4183   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
4184   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4185   if (CalleeCC != CallerCC) {
4186     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4187     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4188       return false;
4189   }
4190 
4191   // Byval parameters hand the function a pointer directly into the stack area
4192   // we want to reuse during a tail call. Working around this *is* possible
4193   // but less efficient and uglier in LowerCall.
4194   for (auto &Arg : Outs)
4195     if (Arg.Flags.isByVal())
4196       return false;
4197 
4198   return true;
4199 }
4200 
4201 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4202 // and output parameter nodes.
4203 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
4204                                        SmallVectorImpl<SDValue> &InVals) const {
4205   SelectionDAG &DAG = CLI.DAG;
4206   SDLoc &DL = CLI.DL;
4207   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4208   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4209   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4210   SDValue Chain = CLI.Chain;
4211   SDValue Callee = CLI.Callee;
4212   bool &IsTailCall = CLI.IsTailCall;
4213   CallingConv::ID CallConv = CLI.CallConv;
4214   bool IsVarArg = CLI.IsVarArg;
4215   EVT PtrVT = getPointerTy(DAG.getDataLayout());
4216   MVT XLenVT = Subtarget.getXLenVT();
4217 
4218   MachineFunction &MF = DAG.getMachineFunction();
4219 
4220   // Analyze the operands of the call, assigning locations to each operand.
4221   SmallVector<CCValAssign, 16> ArgLocs;
4222   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4223 
4224   if (CallConv == CallingConv::Fast)
4225     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
4226   else if (CallConv == CallingConv::GHC)
4227     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
4228   else
4229     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
4230 
4231   // Check if it's really possible to do a tail call.
4232   if (IsTailCall)
4233     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4234 
4235   if (IsTailCall)
4236     ++NumTailCalls;
4237   else if (CLI.CB && CLI.CB->isMustTailCall())
4238     report_fatal_error("failed to perform tail call elimination on a call "
4239                        "site marked musttail");
4240 
4241   // Get a count of how many bytes are to be pushed on the stack.
4242   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
4243 
4244   // Create local copies for byval args
4245   SmallVector<SDValue, 8> ByValArgs;
4246   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4247     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4248     if (!Flags.isByVal())
4249       continue;
4250 
4251     SDValue Arg = OutVals[i];
4252     unsigned Size = Flags.getByValSize();
4253     Align Alignment = Flags.getNonZeroByValAlign();
4254 
4255     int FI =
4256         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4257     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4258     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
4259 
4260     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4261                           /*IsVolatile=*/false,
4262                           /*AlwaysInline=*/false, IsTailCall,
4263                           MachinePointerInfo(), MachinePointerInfo());
4264     ByValArgs.push_back(FIPtr);
4265   }
4266 
4267   if (!IsTailCall)
4268     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4269 
4270   // Copy argument values to their designated locations.
4271   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4272   SmallVector<SDValue, 8> MemOpChains;
4273   SDValue StackPtr;
4274   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4275     CCValAssign &VA = ArgLocs[i];
4276     SDValue ArgValue = OutVals[i];
4277     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4278 
4279     // Handle passing f64 on RV32D with a soft float ABI as a special case.
4280     bool IsF64OnRV32DSoftABI =
4281         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
4282     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
4283       SDValue SplitF64 = DAG.getNode(
4284           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
4285       SDValue Lo = SplitF64.getValue(0);
4286       SDValue Hi = SplitF64.getValue(1);
4287 
4288       Register RegLo = VA.getLocReg();
4289       RegsToPass.push_back(std::make_pair(RegLo, Lo));
4290 
4291       if (RegLo == RISCV::X17) {
4292         // Second half of f64 is passed on the stack.
4293         // Work out the address of the stack slot.
4294         if (!StackPtr.getNode())
4295           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
4296         // Emit the store.
4297         MemOpChains.push_back(
4298             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
4299       } else {
4300         // Second half of f64 is passed in another GPR.
4301         assert(RegLo < RISCV::X31 && "Invalid register pair");
4302         Register RegHigh = RegLo + 1;
4303         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
4304       }
4305       continue;
4306     }
4307 
4308     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
4309     // as any other MemLoc.
4310 
4311     // Promote the value if needed.
4312     // For now, only handle fully promoted and indirect arguments.
4313     if (VA.getLocInfo() == CCValAssign::Indirect) {
4314       // Store the argument in a stack slot and pass its address.
4315       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
4316       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4317       MemOpChains.push_back(
4318           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4319                        MachinePointerInfo::getFixedStack(MF, FI)));
4320       // If the original argument was split (e.g. i128), we need
4321       // to store all parts of it here (and pass just one address).
4322       unsigned ArgIndex = Outs[i].OrigArgIndex;
4323       assert(Outs[i].PartOffset == 0);
4324       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4325         SDValue PartValue = OutVals[i + 1];
4326         unsigned PartOffset = Outs[i + 1].PartOffset;
4327         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
4328                                       DAG.getIntPtrConstant(PartOffset, DL));
4329         MemOpChains.push_back(
4330             DAG.getStore(Chain, DL, PartValue, Address,
4331                          MachinePointerInfo::getFixedStack(MF, FI)));
4332         ++i;
4333       }
4334       ArgValue = SpillSlot;
4335     } else {
4336       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
4337     }
4338 
4339     // Use local copy if it is a byval arg.
4340     if (Flags.isByVal())
4341       ArgValue = ByValArgs[j++];
4342 
4343     if (VA.isRegLoc()) {
4344       // Queue up the argument copies and emit them at the end.
4345       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
4346     } else {
4347       assert(VA.isMemLoc() && "Argument not register or memory");
4348       assert(!IsTailCall && "Tail call not allowed if stack is used "
4349                             "for passing parameters");
4350 
4351       // Work out the address of the stack slot.
4352       if (!StackPtr.getNode())
4353         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
4354       SDValue Address =
4355           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
4356                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
4357 
4358       // Emit the store.
4359       MemOpChains.push_back(
4360           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
4361     }
4362   }
4363 
4364   // Join the stores, which are independent of one another.
4365   if (!MemOpChains.empty())
4366     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4367 
4368   SDValue Glue;
4369 
4370   // Build a sequence of copy-to-reg nodes, chained and glued together.
4371   for (auto &Reg : RegsToPass) {
4372     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
4373     Glue = Chain.getValue(1);
4374   }
4375 
4376   // Validate that none of the argument registers have been marked as
4377   // reserved, if so report an error. Do the same for the return address if this
4378   // is not a tailcall.
4379   validateCCReservedRegs(RegsToPass, MF);
4380   if (!IsTailCall &&
4381       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
4382     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
4383         MF.getFunction(),
4384         "Return address register required, but has been reserved."});
4385 
4386   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4387   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4388   // split it and then direct call can be matched by PseudoCALL.
4389   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
4390     const GlobalValue *GV = S->getGlobal();
4391 
4392     unsigned OpFlags = RISCVII::MO_CALL;
4393     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
4394       OpFlags = RISCVII::MO_PLT;
4395 
4396     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
4397   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4398     unsigned OpFlags = RISCVII::MO_CALL;
4399 
4400     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
4401                                                  nullptr))
4402       OpFlags = RISCVII::MO_PLT;
4403 
4404     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
4405   }
4406 
4407   // The first call operand is the chain and the second is the target address.
4408   SmallVector<SDValue, 8> Ops;
4409   Ops.push_back(Chain);
4410   Ops.push_back(Callee);
4411 
4412   // Add argument registers to the end of the list so that they are
4413   // known live into the call.
4414   for (auto &Reg : RegsToPass)
4415     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
4416 
4417   if (!IsTailCall) {
4418     // Add a register mask operand representing the call-preserved registers.
4419     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4420     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4421     assert(Mask && "Missing call preserved mask for calling convention");
4422     Ops.push_back(DAG.getRegisterMask(Mask));
4423   }
4424 
4425   // Glue the call to the argument copies, if any.
4426   if (Glue.getNode())
4427     Ops.push_back(Glue);
4428 
4429   // Emit the call.
4430   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4431 
4432   if (IsTailCall) {
4433     MF.getFrameInfo().setHasTailCall();
4434     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
4435   }
4436 
4437   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
4438   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4439   Glue = Chain.getValue(1);
4440 
4441   // Mark the end of the call, which is glued to the call itself.
4442   Chain = DAG.getCALLSEQ_END(Chain,
4443                              DAG.getConstant(NumBytes, DL, PtrVT, true),
4444                              DAG.getConstant(0, DL, PtrVT, true),
4445                              Glue, DL);
4446   Glue = Chain.getValue(1);
4447 
4448   // Assign locations to each value returned by this call.
4449   SmallVector<CCValAssign, 16> RVLocs;
4450   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4451   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
4452 
4453   // Copy all of the result registers out of their specified physreg.
4454   for (auto &VA : RVLocs) {
4455     // Copy the value out
4456     SDValue RetValue =
4457         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
4458     // Glue the RetValue to the end of the call sequence
4459     Chain = RetValue.getValue(1);
4460     Glue = RetValue.getValue(2);
4461 
4462     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
4463       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
4464       SDValue RetValue2 =
4465           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
4466       Chain = RetValue2.getValue(1);
4467       Glue = RetValue2.getValue(2);
4468       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
4469                              RetValue2);
4470     }
4471 
4472     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
4473 
4474     InVals.push_back(RetValue);
4475   }
4476 
4477   return Chain;
4478 }
4479 
4480 bool RISCVTargetLowering::CanLowerReturn(
4481     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4482     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4483   SmallVector<CCValAssign, 16> RVLocs;
4484   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4485 
4486   Optional<unsigned> FirstMaskArgument;
4487   if (Subtarget.hasStdExtV())
4488     FirstMaskArgument = preAssignMask(Outs);
4489 
4490   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4491     MVT VT = Outs[i].VT;
4492     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4493     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
4494     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
4495                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
4496                  *this, FirstMaskArgument))
4497       return false;
4498   }
4499   return true;
4500 }
4501 
4502 SDValue
4503 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
4504                                  bool IsVarArg,
4505                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
4506                                  const SmallVectorImpl<SDValue> &OutVals,
4507                                  const SDLoc &DL, SelectionDAG &DAG) const {
4508   const MachineFunction &MF = DAG.getMachineFunction();
4509   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
4510 
4511   // Stores the assignment of the return value to a location.
4512   SmallVector<CCValAssign, 16> RVLocs;
4513 
4514   // Info about the registers and stack slot.
4515   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4516                  *DAG.getContext());
4517 
4518   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4519                     nullptr);
4520 
4521   if (CallConv == CallingConv::GHC && !RVLocs.empty())
4522     report_fatal_error("GHC functions return void only");
4523 
4524   SDValue Glue;
4525   SmallVector<SDValue, 4> RetOps(1, Chain);
4526 
4527   // Copy the result values into the output registers.
4528   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4529     SDValue Val = OutVals[i];
4530     CCValAssign &VA = RVLocs[i];
4531     assert(VA.isRegLoc() && "Can only return in registers!");
4532 
4533     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
4534       // Handle returning f64 on RV32D with a soft float ABI.
4535       assert(VA.isRegLoc() && "Expected return via registers");
4536       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
4537                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
4538       SDValue Lo = SplitF64.getValue(0);
4539       SDValue Hi = SplitF64.getValue(1);
4540       Register RegLo = VA.getLocReg();
4541       assert(RegLo < RISCV::X31 && "Invalid register pair");
4542       Register RegHi = RegLo + 1;
4543 
4544       if (STI.isRegisterReservedByUser(RegLo) ||
4545           STI.isRegisterReservedByUser(RegHi))
4546         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
4547             MF.getFunction(),
4548             "Return value register required, but has been reserved."});
4549 
4550       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
4551       Glue = Chain.getValue(1);
4552       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
4553       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
4554       Glue = Chain.getValue(1);
4555       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
4556     } else {
4557       // Handle a 'normal' return.
4558       Val = convertValVTToLocVT(DAG, Val, VA, DL);
4559       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4560 
4561       if (STI.isRegisterReservedByUser(VA.getLocReg()))
4562         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
4563             MF.getFunction(),
4564             "Return value register required, but has been reserved."});
4565 
4566       // Guarantee that all emitted copies are stuck together.
4567       Glue = Chain.getValue(1);
4568       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4569     }
4570   }
4571 
4572   RetOps[0] = Chain; // Update chain.
4573 
4574   // Add the glue node if we have it.
4575   if (Glue.getNode()) {
4576     RetOps.push_back(Glue);
4577   }
4578 
4579   // Interrupt service routines use different return instructions.
4580   const Function &Func = DAG.getMachineFunction().getFunction();
4581   if (Func.hasFnAttribute("interrupt")) {
4582     if (!Func.getReturnType()->isVoidTy())
4583       report_fatal_error(
4584           "Functions with the interrupt attribute must have void return type!");
4585 
4586     MachineFunction &MF = DAG.getMachineFunction();
4587     StringRef Kind =
4588       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
4589 
4590     unsigned RetOpc;
4591     if (Kind == "user")
4592       RetOpc = RISCVISD::URET_FLAG;
4593     else if (Kind == "supervisor")
4594       RetOpc = RISCVISD::SRET_FLAG;
4595     else
4596       RetOpc = RISCVISD::MRET_FLAG;
4597 
4598     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
4599   }
4600 
4601   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
4602 }
4603 
4604 void RISCVTargetLowering::validateCCReservedRegs(
4605     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
4606     MachineFunction &MF) const {
4607   const Function &F = MF.getFunction();
4608   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
4609 
4610   if (llvm::any_of(Regs, [&STI](auto Reg) {
4611         return STI.isRegisterReservedByUser(Reg.first);
4612       }))
4613     F.getContext().diagnose(DiagnosticInfoUnsupported{
4614         F, "Argument register required, but has been reserved."});
4615 }
4616 
4617 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
4618   return CI->isTailCall();
4619 }
4620 
4621 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
4622 #define NODE_NAME_CASE(NODE)                                                   \
4623   case RISCVISD::NODE:                                                         \
4624     return "RISCVISD::" #NODE;
4625   // clang-format off
4626   switch ((RISCVISD::NodeType)Opcode) {
4627   case RISCVISD::FIRST_NUMBER:
4628     break;
4629   NODE_NAME_CASE(RET_FLAG)
4630   NODE_NAME_CASE(URET_FLAG)
4631   NODE_NAME_CASE(SRET_FLAG)
4632   NODE_NAME_CASE(MRET_FLAG)
4633   NODE_NAME_CASE(CALL)
4634   NODE_NAME_CASE(SELECT_CC)
4635   NODE_NAME_CASE(BuildPairF64)
4636   NODE_NAME_CASE(SplitF64)
4637   NODE_NAME_CASE(TAIL)
4638   NODE_NAME_CASE(SLLW)
4639   NODE_NAME_CASE(SRAW)
4640   NODE_NAME_CASE(SRLW)
4641   NODE_NAME_CASE(DIVW)
4642   NODE_NAME_CASE(DIVUW)
4643   NODE_NAME_CASE(REMUW)
4644   NODE_NAME_CASE(ROLW)
4645   NODE_NAME_CASE(RORW)
4646   NODE_NAME_CASE(FSLW)
4647   NODE_NAME_CASE(FSRW)
4648   NODE_NAME_CASE(FSL)
4649   NODE_NAME_CASE(FSR)
4650   NODE_NAME_CASE(FMV_H_X)
4651   NODE_NAME_CASE(FMV_X_ANYEXTH)
4652   NODE_NAME_CASE(FMV_W_X_RV64)
4653   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
4654   NODE_NAME_CASE(READ_CYCLE_WIDE)
4655   NODE_NAME_CASE(GREVI)
4656   NODE_NAME_CASE(GREVIW)
4657   NODE_NAME_CASE(GORCI)
4658   NODE_NAME_CASE(GORCIW)
4659   NODE_NAME_CASE(VMV_V_X_VL)
4660   NODE_NAME_CASE(VFMV_V_F_VL)
4661   NODE_NAME_CASE(VMV_X_S)
4662   NODE_NAME_CASE(SPLAT_VECTOR_I64)
4663   NODE_NAME_CASE(READ_VLENB)
4664   NODE_NAME_CASE(TRUNCATE_VECTOR)
4665   NODE_NAME_CASE(VLEFF)
4666   NODE_NAME_CASE(VLEFF_MASK)
4667   NODE_NAME_CASE(VSLIDEUP)
4668   NODE_NAME_CASE(VSLIDEDOWN)
4669   NODE_NAME_CASE(VID_VL)
4670   NODE_NAME_CASE(VFNCVT_ROD)
4671   NODE_NAME_CASE(VECREDUCE_ADD)
4672   NODE_NAME_CASE(VECREDUCE_UMAX)
4673   NODE_NAME_CASE(VECREDUCE_SMAX)
4674   NODE_NAME_CASE(VECREDUCE_UMIN)
4675   NODE_NAME_CASE(VECREDUCE_SMIN)
4676   NODE_NAME_CASE(VECREDUCE_AND)
4677   NODE_NAME_CASE(VECREDUCE_OR)
4678   NODE_NAME_CASE(VECREDUCE_XOR)
4679   NODE_NAME_CASE(VECREDUCE_FADD)
4680   NODE_NAME_CASE(VECREDUCE_SEQ_FADD)
4681   NODE_NAME_CASE(ADD_VL)
4682   NODE_NAME_CASE(AND_VL)
4683   NODE_NAME_CASE(MUL_VL)
4684   NODE_NAME_CASE(OR_VL)
4685   NODE_NAME_CASE(SDIV_VL)
4686   NODE_NAME_CASE(SHL_VL)
4687   NODE_NAME_CASE(SREM_VL)
4688   NODE_NAME_CASE(SRA_VL)
4689   NODE_NAME_CASE(SRL_VL)
4690   NODE_NAME_CASE(SUB_VL)
4691   NODE_NAME_CASE(UDIV_VL)
4692   NODE_NAME_CASE(UREM_VL)
4693   NODE_NAME_CASE(XOR_VL)
4694   NODE_NAME_CASE(FADD_VL)
4695   NODE_NAME_CASE(FSUB_VL)
4696   NODE_NAME_CASE(FMUL_VL)
4697   NODE_NAME_CASE(FDIV_VL)
4698   NODE_NAME_CASE(FNEG_VL)
4699   NODE_NAME_CASE(FMA_VL)
4700   NODE_NAME_CASE(VMCLR_VL)
4701   NODE_NAME_CASE(VMSET_VL)
4702   NODE_NAME_CASE(VRGATHER_VX_VL)
4703   NODE_NAME_CASE(VLE_VL)
4704   NODE_NAME_CASE(VSE_VL)
4705   }
4706   // clang-format on
4707   return nullptr;
4708 #undef NODE_NAME_CASE
4709 }
4710 
4711 /// getConstraintType - Given a constraint letter, return the type of
4712 /// constraint it is for this target.
4713 RISCVTargetLowering::ConstraintType
4714 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
4715   if (Constraint.size() == 1) {
4716     switch (Constraint[0]) {
4717     default:
4718       break;
4719     case 'f':
4720       return C_RegisterClass;
4721     case 'I':
4722     case 'J':
4723     case 'K':
4724       return C_Immediate;
4725     case 'A':
4726       return C_Memory;
4727     }
4728   }
4729   return TargetLowering::getConstraintType(Constraint);
4730 }
4731 
4732 std::pair<unsigned, const TargetRegisterClass *>
4733 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
4734                                                   StringRef Constraint,
4735                                                   MVT VT) const {
4736   // First, see if this is a constraint that directly corresponds to a
4737   // RISCV register class.
4738   if (Constraint.size() == 1) {
4739     switch (Constraint[0]) {
4740     case 'r':
4741       return std::make_pair(0U, &RISCV::GPRRegClass);
4742     case 'f':
4743       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
4744         return std::make_pair(0U, &RISCV::FPR16RegClass);
4745       if (Subtarget.hasStdExtF() && VT == MVT::f32)
4746         return std::make_pair(0U, &RISCV::FPR32RegClass);
4747       if (Subtarget.hasStdExtD() && VT == MVT::f64)
4748         return std::make_pair(0U, &RISCV::FPR64RegClass);
4749       break;
4750     default:
4751       break;
4752     }
4753   }
4754 
4755   // Clang will correctly decode the usage of register name aliases into their
4756   // official names. However, other frontends like `rustc` do not. This allows
4757   // users of these frontends to use the ABI names for registers in LLVM-style
4758   // register constraints.
4759   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
4760                                .Case("{zero}", RISCV::X0)
4761                                .Case("{ra}", RISCV::X1)
4762                                .Case("{sp}", RISCV::X2)
4763                                .Case("{gp}", RISCV::X3)
4764                                .Case("{tp}", RISCV::X4)
4765                                .Case("{t0}", RISCV::X5)
4766                                .Case("{t1}", RISCV::X6)
4767                                .Case("{t2}", RISCV::X7)
4768                                .Cases("{s0}", "{fp}", RISCV::X8)
4769                                .Case("{s1}", RISCV::X9)
4770                                .Case("{a0}", RISCV::X10)
4771                                .Case("{a1}", RISCV::X11)
4772                                .Case("{a2}", RISCV::X12)
4773                                .Case("{a3}", RISCV::X13)
4774                                .Case("{a4}", RISCV::X14)
4775                                .Case("{a5}", RISCV::X15)
4776                                .Case("{a6}", RISCV::X16)
4777                                .Case("{a7}", RISCV::X17)
4778                                .Case("{s2}", RISCV::X18)
4779                                .Case("{s3}", RISCV::X19)
4780                                .Case("{s4}", RISCV::X20)
4781                                .Case("{s5}", RISCV::X21)
4782                                .Case("{s6}", RISCV::X22)
4783                                .Case("{s7}", RISCV::X23)
4784                                .Case("{s8}", RISCV::X24)
4785                                .Case("{s9}", RISCV::X25)
4786                                .Case("{s10}", RISCV::X26)
4787                                .Case("{s11}", RISCV::X27)
4788                                .Case("{t3}", RISCV::X28)
4789                                .Case("{t4}", RISCV::X29)
4790                                .Case("{t5}", RISCV::X30)
4791                                .Case("{t6}", RISCV::X31)
4792                                .Default(RISCV::NoRegister);
4793   if (XRegFromAlias != RISCV::NoRegister)
4794     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
4795 
4796   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
4797   // TableGen record rather than the AsmName to choose registers for InlineAsm
4798   // constraints, plus we want to match those names to the widest floating point
4799   // register type available, manually select floating point registers here.
4800   //
4801   // The second case is the ABI name of the register, so that frontends can also
4802   // use the ABI names in register constraint lists.
4803   if (Subtarget.hasStdExtF()) {
4804     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
4805                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
4806                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
4807                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
4808                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
4809                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
4810                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
4811                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
4812                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
4813                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
4814                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
4815                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
4816                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
4817                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
4818                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
4819                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
4820                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
4821                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
4822                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
4823                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
4824                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
4825                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
4826                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
4827                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
4828                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
4829                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
4830                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
4831                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
4832                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
4833                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
4834                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
4835                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
4836                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
4837                         .Default(RISCV::NoRegister);
4838     if (FReg != RISCV::NoRegister) {
4839       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
4840       if (Subtarget.hasStdExtD()) {
4841         unsigned RegNo = FReg - RISCV::F0_F;
4842         unsigned DReg = RISCV::F0_D + RegNo;
4843         return std::make_pair(DReg, &RISCV::FPR64RegClass);
4844       }
4845       return std::make_pair(FReg, &RISCV::FPR32RegClass);
4846     }
4847   }
4848 
4849   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4850 }
4851 
4852 unsigned
4853 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
4854   // Currently only support length 1 constraints.
4855   if (ConstraintCode.size() == 1) {
4856     switch (ConstraintCode[0]) {
4857     case 'A':
4858       return InlineAsm::Constraint_A;
4859     default:
4860       break;
4861     }
4862   }
4863 
4864   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
4865 }
4866 
4867 void RISCVTargetLowering::LowerAsmOperandForConstraint(
4868     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
4869     SelectionDAG &DAG) const {
4870   // Currently only support length 1 constraints.
4871   if (Constraint.length() == 1) {
4872     switch (Constraint[0]) {
4873     case 'I':
4874       // Validate & create a 12-bit signed immediate operand.
4875       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4876         uint64_t CVal = C->getSExtValue();
4877         if (isInt<12>(CVal))
4878           Ops.push_back(
4879               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
4880       }
4881       return;
4882     case 'J':
4883       // Validate & create an integer zero operand.
4884       if (auto *C = dyn_cast<ConstantSDNode>(Op))
4885         if (C->getZExtValue() == 0)
4886           Ops.push_back(
4887               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
4888       return;
4889     case 'K':
4890       // Validate & create a 5-bit unsigned immediate operand.
4891       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4892         uint64_t CVal = C->getZExtValue();
4893         if (isUInt<5>(CVal))
4894           Ops.push_back(
4895               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
4896       }
4897       return;
4898     default:
4899       break;
4900     }
4901   }
4902   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4903 }
4904 
4905 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
4906                                                    Instruction *Inst,
4907                                                    AtomicOrdering Ord) const {
4908   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
4909     return Builder.CreateFence(Ord);
4910   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
4911     return Builder.CreateFence(AtomicOrdering::Release);
4912   return nullptr;
4913 }
4914 
4915 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
4916                                                     Instruction *Inst,
4917                                                     AtomicOrdering Ord) const {
4918   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
4919     return Builder.CreateFence(AtomicOrdering::Acquire);
4920   return nullptr;
4921 }
4922 
4923 TargetLowering::AtomicExpansionKind
4924 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
4925   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
4926   // point operations can't be used in an lr/sc sequence without breaking the
4927   // forward-progress guarantee.
4928   if (AI->isFloatingPointOperation())
4929     return AtomicExpansionKind::CmpXChg;
4930 
4931   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4932   if (Size == 8 || Size == 16)
4933     return AtomicExpansionKind::MaskedIntrinsic;
4934   return AtomicExpansionKind::None;
4935 }
4936 
4937 static Intrinsic::ID
4938 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
4939   if (XLen == 32) {
4940     switch (BinOp) {
4941     default:
4942       llvm_unreachable("Unexpected AtomicRMW BinOp");
4943     case AtomicRMWInst::Xchg:
4944       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
4945     case AtomicRMWInst::Add:
4946       return Intrinsic::riscv_masked_atomicrmw_add_i32;
4947     case AtomicRMWInst::Sub:
4948       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
4949     case AtomicRMWInst::Nand:
4950       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
4951     case AtomicRMWInst::Max:
4952       return Intrinsic::riscv_masked_atomicrmw_max_i32;
4953     case AtomicRMWInst::Min:
4954       return Intrinsic::riscv_masked_atomicrmw_min_i32;
4955     case AtomicRMWInst::UMax:
4956       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
4957     case AtomicRMWInst::UMin:
4958       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
4959     }
4960   }
4961 
4962   if (XLen == 64) {
4963     switch (BinOp) {
4964     default:
4965       llvm_unreachable("Unexpected AtomicRMW BinOp");
4966     case AtomicRMWInst::Xchg:
4967       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
4968     case AtomicRMWInst::Add:
4969       return Intrinsic::riscv_masked_atomicrmw_add_i64;
4970     case AtomicRMWInst::Sub:
4971       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
4972     case AtomicRMWInst::Nand:
4973       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
4974     case AtomicRMWInst::Max:
4975       return Intrinsic::riscv_masked_atomicrmw_max_i64;
4976     case AtomicRMWInst::Min:
4977       return Intrinsic::riscv_masked_atomicrmw_min_i64;
4978     case AtomicRMWInst::UMax:
4979       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
4980     case AtomicRMWInst::UMin:
4981       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
4982     }
4983   }
4984 
4985   llvm_unreachable("Unexpected XLen\n");
4986 }
4987 
4988 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
4989     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4990     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4991   unsigned XLen = Subtarget.getXLen();
4992   Value *Ordering =
4993       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
4994   Type *Tys[] = {AlignedAddr->getType()};
4995   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
4996       AI->getModule(),
4997       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
4998 
4999   if (XLen == 64) {
5000     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
5001     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5002     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
5003   }
5004 
5005   Value *Result;
5006 
5007   // Must pass the shift amount needed to sign extend the loaded value prior
5008   // to performing a signed comparison for min/max. ShiftAmt is the number of
5009   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
5010   // is the number of bits to left+right shift the value in order to
5011   // sign-extend.
5012   if (AI->getOperation() == AtomicRMWInst::Min ||
5013       AI->getOperation() == AtomicRMWInst::Max) {
5014     const DataLayout &DL = AI->getModule()->getDataLayout();
5015     unsigned ValWidth =
5016         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
5017     Value *SextShamt =
5018         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
5019     Result = Builder.CreateCall(LrwOpScwLoop,
5020                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
5021   } else {
5022     Result =
5023         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
5024   }
5025 
5026   if (XLen == 64)
5027     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5028   return Result;
5029 }
5030 
5031 TargetLowering::AtomicExpansionKind
5032 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
5033     AtomicCmpXchgInst *CI) const {
5034   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
5035   if (Size == 8 || Size == 16)
5036     return AtomicExpansionKind::MaskedIntrinsic;
5037   return AtomicExpansionKind::None;
5038 }
5039 
5040 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
5041     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5042     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5043   unsigned XLen = Subtarget.getXLen();
5044   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
5045   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
5046   if (XLen == 64) {
5047     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5048     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5049     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5050     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
5051   }
5052   Type *Tys[] = {AlignedAddr->getType()};
5053   Function *MaskedCmpXchg =
5054       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
5055   Value *Result = Builder.CreateCall(
5056       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
5057   if (XLen == 64)
5058     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5059   return Result;
5060 }
5061 
5062 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
5063                                                      EVT VT) const {
5064   VT = VT.getScalarType();
5065 
5066   if (!VT.isSimple())
5067     return false;
5068 
5069   switch (VT.getSimpleVT().SimpleTy) {
5070   case MVT::f16:
5071     return Subtarget.hasStdExtZfh();
5072   case MVT::f32:
5073     return Subtarget.hasStdExtF();
5074   case MVT::f64:
5075     return Subtarget.hasStdExtD();
5076   default:
5077     break;
5078   }
5079 
5080   return false;
5081 }
5082 
5083 Register RISCVTargetLowering::getExceptionPointerRegister(
5084     const Constant *PersonalityFn) const {
5085   return RISCV::X10;
5086 }
5087 
5088 Register RISCVTargetLowering::getExceptionSelectorRegister(
5089     const Constant *PersonalityFn) const {
5090   return RISCV::X11;
5091 }
5092 
5093 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
5094   // Return false to suppress the unnecessary extensions if the LibCall
5095   // arguments or return value is f32 type for LP64 ABI.
5096   RISCVABI::ABI ABI = Subtarget.getTargetABI();
5097   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
5098     return false;
5099 
5100   return true;
5101 }
5102 
5103 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
5104   if (Subtarget.is64Bit() && Type == MVT::i32)
5105     return true;
5106 
5107   return IsSigned;
5108 }
5109 
5110 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5111                                                  SDValue C) const {
5112   // Check integral scalar types.
5113   if (VT.isScalarInteger()) {
5114     // Omit the optimization if the sub target has the M extension and the data
5115     // size exceeds XLen.
5116     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
5117       return false;
5118     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
5119       // Break the MUL to a SLLI and an ADD/SUB.
5120       const APInt &Imm = ConstNode->getAPIntValue();
5121       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
5122           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
5123         return true;
5124       // Omit the following optimization if the sub target has the M extension
5125       // and the data size >= XLen.
5126       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
5127         return false;
5128       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
5129       // a pair of LUI/ADDI.
5130       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
5131         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
5132         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
5133             (1 - ImmS).isPowerOf2())
5134         return true;
5135       }
5136     }
5137   }
5138 
5139   return false;
5140 }
5141 
5142 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
5143   if (!Subtarget.useRVVForFixedLengthVectors())
5144     return false;
5145 
5146   if (!VT.isFixedLengthVector())
5147     return false;
5148 
5149   // Don't use RVV for vectors we cannot scalarize if required.
5150   switch (VT.getVectorElementType().SimpleTy) {
5151   default:
5152     return false;
5153   case MVT::i1:
5154   case MVT::i8:
5155   case MVT::i16:
5156   case MVT::i32:
5157   case MVT::i64:
5158     break;
5159   case MVT::f16:
5160     if (!Subtarget.hasStdExtZfh())
5161       return false;
5162     break;
5163   case MVT::f32:
5164     if (!Subtarget.hasStdExtF())
5165       return false;
5166     break;
5167   case MVT::f64:
5168     if (!Subtarget.hasStdExtD())
5169       return false;
5170     break;
5171   }
5172 
5173   unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
5174   // Don't use RVV for types that don't fit.
5175   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
5176     return false;
5177 
5178   // TODO: Perhaps an artificial restriction, but worth having whilst getting
5179   // the base fixed length RVV support in place.
5180   if (!VT.isPow2VectorType())
5181     return false;
5182 
5183   return true;
5184 }
5185 
5186 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
5187     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
5188     bool *Fast) const {
5189   if (!VT.isScalableVector())
5190     return false;
5191 
5192   EVT ElemVT = VT.getVectorElementType();
5193   if (Alignment >= ElemVT.getStoreSize()) {
5194     if (Fast)
5195       *Fast = true;
5196     return true;
5197   }
5198 
5199   return false;
5200 }
5201 
5202 #define GET_REGISTER_MATCHER
5203 #include "RISCVGenAsmMatcher.inc"
5204 
5205 Register
5206 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
5207                                        const MachineFunction &MF) const {
5208   Register Reg = MatchRegisterAltName(RegName);
5209   if (Reg == RISCV::NoRegister)
5210     Reg = MatchRegisterName(RegName);
5211   if (Reg == RISCV::NoRegister)
5212     report_fatal_error(
5213         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
5214   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
5215   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
5216     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
5217                              StringRef(RegName) + "\"."));
5218   return Reg;
5219 }
5220 
5221 namespace llvm {
5222 namespace RISCVVIntrinsicsTable {
5223 
5224 #define GET_RISCVVIntrinsicsTable_IMPL
5225 #include "RISCVGenSearchableTables.inc"
5226 
5227 } // namespace RISCVVIntrinsicsTable
5228 
5229 namespace RISCVZvlssegTable {
5230 
5231 #define GET_RISCVZvlssegTable_IMPL
5232 #include "RISCVGenSearchableTables.inc"
5233 
5234 } // namespace RISCVZvlssegTable
5235 } // namespace llvm
5236