1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtZfh())
87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88   if (Subtarget.hasStdExtF())
89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90   if (Subtarget.hasStdExtD())
91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93   static const MVT::SimpleValueType BoolVecVTs[] = {
94       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
95       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
96   static const MVT::SimpleValueType IntVecVTs[] = {
97       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
98       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
99       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
100       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
101       MVT::nxv4i64, MVT::nxv8i64};
102   static const MVT::SimpleValueType F16VecVTs[] = {
103       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
104       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
105   static const MVT::SimpleValueType F32VecVTs[] = {
106       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
107   static const MVT::SimpleValueType F64VecVTs[] = {
108       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
109 
110   if (Subtarget.hasStdExtV()) {
111     auto addRegClassForRVV = [this](MVT VT) {
112       unsigned Size = VT.getSizeInBits().getKnownMinValue();
113       assert(Size <= 512 && isPowerOf2_32(Size));
114       const TargetRegisterClass *RC;
115       if (Size <= 64)
116         RC = &RISCV::VRRegClass;
117       else if (Size == 128)
118         RC = &RISCV::VRM2RegClass;
119       else if (Size == 256)
120         RC = &RISCV::VRM4RegClass;
121       else
122         RC = &RISCV::VRM8RegClass;
123 
124       addRegisterClass(VT, RC);
125     };
126 
127     for (MVT VT : BoolVecVTs)
128       addRegClassForRVV(VT);
129     for (MVT VT : IntVecVTs)
130       addRegClassForRVV(VT);
131 
132     if (Subtarget.hasStdExtZfh())
133       for (MVT VT : F16VecVTs)
134         addRegClassForRVV(VT);
135 
136     if (Subtarget.hasStdExtF())
137       for (MVT VT : F32VecVTs)
138         addRegClassForRVV(VT);
139 
140     if (Subtarget.hasStdExtD())
141       for (MVT VT : F64VecVTs)
142         addRegClassForRVV(VT);
143   }
144 
145   // Compute derived properties from the register classes.
146   computeRegisterProperties(STI.getRegisterInfo());
147 
148   setStackPointerRegisterToSaveRestore(RISCV::X2);
149 
150   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
151     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
152 
153   // TODO: add all necessary setOperationAction calls.
154   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
155 
156   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
157   setOperationAction(ISD::BR_CC, XLenVT, Expand);
158   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
159 
160   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
161   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
162 
163   setOperationAction(ISD::VASTART, MVT::Other, Custom);
164   setOperationAction(ISD::VAARG, MVT::Other, Expand);
165   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
166   setOperationAction(ISD::VAEND, MVT::Other, Expand);
167 
168   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
169   if (!Subtarget.hasStdExtZbb()) {
170     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
171     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
172   }
173 
174   if (Subtarget.is64Bit()) {
175     setOperationAction(ISD::ADD, MVT::i32, Custom);
176     setOperationAction(ISD::SUB, MVT::i32, Custom);
177     setOperationAction(ISD::SHL, MVT::i32, Custom);
178     setOperationAction(ISD::SRA, MVT::i32, Custom);
179     setOperationAction(ISD::SRL, MVT::i32, Custom);
180   }
181 
182   if (!Subtarget.hasStdExtM()) {
183     setOperationAction(ISD::MUL, XLenVT, Expand);
184     setOperationAction(ISD::MULHS, XLenVT, Expand);
185     setOperationAction(ISD::MULHU, XLenVT, Expand);
186     setOperationAction(ISD::SDIV, XLenVT, Expand);
187     setOperationAction(ISD::UDIV, XLenVT, Expand);
188     setOperationAction(ISD::SREM, XLenVT, Expand);
189     setOperationAction(ISD::UREM, XLenVT, Expand);
190   }
191 
192   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
193     setOperationAction(ISD::MUL, MVT::i32, Custom);
194 
195     setOperationAction(ISD::SDIV, MVT::i8, Custom);
196     setOperationAction(ISD::UDIV, MVT::i8, Custom);
197     setOperationAction(ISD::UREM, MVT::i8, Custom);
198     setOperationAction(ISD::SDIV, MVT::i16, Custom);
199     setOperationAction(ISD::UDIV, MVT::i16, Custom);
200     setOperationAction(ISD::UREM, MVT::i16, Custom);
201     setOperationAction(ISD::SDIV, MVT::i32, Custom);
202     setOperationAction(ISD::UDIV, MVT::i32, Custom);
203     setOperationAction(ISD::UREM, MVT::i32, Custom);
204   }
205 
206   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
207   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
208   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
209   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
210 
211   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
212   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
213   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
214 
215   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
216     if (Subtarget.is64Bit()) {
217       setOperationAction(ISD::ROTL, MVT::i32, Custom);
218       setOperationAction(ISD::ROTR, MVT::i32, Custom);
219     }
220   } else {
221     setOperationAction(ISD::ROTL, XLenVT, Expand);
222     setOperationAction(ISD::ROTR, XLenVT, Expand);
223   }
224 
225   if (Subtarget.hasStdExtZbp()) {
226     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
227     // more combining.
228     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
229     setOperationAction(ISD::BSWAP, XLenVT, Custom);
230 
231     if (Subtarget.is64Bit()) {
232       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
233       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
234     }
235   } else {
236     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
237     // pattern match it directly in isel.
238     setOperationAction(ISD::BSWAP, XLenVT,
239                        Subtarget.hasStdExtZbb() ? Legal : Expand);
240   }
241 
242   if (Subtarget.hasStdExtZbb()) {
243     setOperationAction(ISD::SMIN, XLenVT, Legal);
244     setOperationAction(ISD::SMAX, XLenVT, Legal);
245     setOperationAction(ISD::UMIN, XLenVT, Legal);
246     setOperationAction(ISD::UMAX, XLenVT, Legal);
247   } else {
248     setOperationAction(ISD::CTTZ, XLenVT, Expand);
249     setOperationAction(ISD::CTLZ, XLenVT, Expand);
250     setOperationAction(ISD::CTPOP, XLenVT, Expand);
251   }
252 
253   if (Subtarget.hasStdExtZbt()) {
254     setOperationAction(ISD::FSHL, XLenVT, Custom);
255     setOperationAction(ISD::FSHR, XLenVT, Custom);
256     setOperationAction(ISD::SELECT, XLenVT, Legal);
257 
258     if (Subtarget.is64Bit()) {
259       setOperationAction(ISD::FSHL, MVT::i32, Custom);
260       setOperationAction(ISD::FSHR, MVT::i32, Custom);
261     }
262   } else {
263     setOperationAction(ISD::SELECT, XLenVT, Custom);
264   }
265 
266   ISD::CondCode FPCCToExpand[] = {
267       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
268       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
269       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
270 
271   ISD::NodeType FPOpToExpand[] = {
272       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
273       ISD::FP_TO_FP16};
274 
275   if (Subtarget.hasStdExtZfh())
276     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
277 
278   if (Subtarget.hasStdExtZfh()) {
279     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
280     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
281     for (auto CC : FPCCToExpand)
282       setCondCodeAction(CC, MVT::f16, Expand);
283     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
284     setOperationAction(ISD::SELECT, MVT::f16, Custom);
285     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
286     for (auto Op : FPOpToExpand)
287       setOperationAction(Op, MVT::f16, Expand);
288   }
289 
290   if (Subtarget.hasStdExtF()) {
291     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
292     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
293     for (auto CC : FPCCToExpand)
294       setCondCodeAction(CC, MVT::f32, Expand);
295     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
296     setOperationAction(ISD::SELECT, MVT::f32, Custom);
297     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
298     for (auto Op : FPOpToExpand)
299       setOperationAction(Op, MVT::f32, Expand);
300     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
301     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
302   }
303 
304   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
305     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
306 
307   if (Subtarget.hasStdExtD()) {
308     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
309     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
310     for (auto CC : FPCCToExpand)
311       setCondCodeAction(CC, MVT::f64, Expand);
312     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
313     setOperationAction(ISD::SELECT, MVT::f64, Custom);
314     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
315     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
316     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
317     for (auto Op : FPOpToExpand)
318       setOperationAction(Op, MVT::f64, Expand);
319     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
320     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
321   }
322 
323   if (Subtarget.is64Bit()) {
324     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
325     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
326     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
327     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
328   }
329 
330   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
331   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
332   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
333   setOperationAction(ISD::JumpTable, XLenVT, Custom);
334 
335   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
336 
337   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
338   // Unfortunately this can't be determined just from the ISA naming string.
339   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
340                      Subtarget.is64Bit() ? Legal : Custom);
341 
342   setOperationAction(ISD::TRAP, MVT::Other, Legal);
343   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
344   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
345 
346   if (Subtarget.hasStdExtA()) {
347     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
348     setMinCmpXchgSizeInBits(32);
349   } else {
350     setMaxAtomicSizeInBitsSupported(0);
351   }
352 
353   setBooleanContents(ZeroOrOneBooleanContent);
354 
355   if (Subtarget.hasStdExtV()) {
356     setBooleanVectorContents(ZeroOrOneBooleanContent);
357 
358     setOperationAction(ISD::VSCALE, XLenVT, Custom);
359 
360     // RVV intrinsics may have illegal operands.
361     // We also need to custom legalize vmv.x.s.
362     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
363     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
364     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
365     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
366     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
367     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
368 
369     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
370 
371     if (Subtarget.is64Bit()) {
372       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
373       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
374     } else {
375       // We must custom-lower certain vXi64 operations on RV32 due to the vector
376       // element type being illegal.
377       setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
378       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
379       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
380 
381       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
382       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
383       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
384       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
385       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
386       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
387       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
388       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
389     }
390 
391     for (MVT VT : BoolVecVTs) {
392       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
393 
394       // Mask VTs are custom-expanded into a series of standard nodes
395       setOperationAction(ISD::TRUNCATE, VT, Custom);
396     }
397 
398     for (MVT VT : IntVecVTs) {
399       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
400 
401       setOperationAction(ISD::SMIN, VT, Legal);
402       setOperationAction(ISD::SMAX, VT, Legal);
403       setOperationAction(ISD::UMIN, VT, Legal);
404       setOperationAction(ISD::UMAX, VT, Legal);
405 
406       setOperationAction(ISD::ROTL, VT, Expand);
407       setOperationAction(ISD::ROTR, VT, Expand);
408 
409       // Custom-lower extensions and truncations from/to mask types.
410       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
411       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
412       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
413 
414       // RVV has native int->float & float->int conversions where the
415       // element type sizes are within one power-of-two of each other. Any
416       // wider distances between type sizes have to be lowered as sequences
417       // which progressively narrow the gap in stages.
418       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
419       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
420       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
421       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
422 
423       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR"
424       // nodes which truncate by one power of two at a time.
425       setOperationAction(ISD::TRUNCATE, VT, Custom);
426 
427       // Custom-lower insert/extract operations to simplify patterns.
428       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
429       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
430 
431       // Custom-lower reduction operations to set up the corresponding custom
432       // nodes' operands.
433       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
434       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
435       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
436       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
437       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
438       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
439       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
440       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
441     }
442 
443     // Expand various CCs to best match the RVV ISA, which natively supports UNE
444     // but no other unordered comparisons, and supports all ordered comparisons
445     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
446     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
447     // and we pattern-match those back to the "original", swapping operands once
448     // more. This way we catch both operations and both "vf" and "fv" forms with
449     // fewer patterns.
450     ISD::CondCode VFPCCToExpand[] = {
451         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
452         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
453         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
454     };
455 
456     // Sets common operation actions on RVV floating-point vector types.
457     const auto SetCommonVFPActions = [&](MVT VT) {
458       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
459       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
460       // sizes are within one power-of-two of each other. Therefore conversions
461       // between vXf16 and vXf64 must be lowered as sequences which convert via
462       // vXf32.
463       setOperationAction(ISD::FP_ROUND, VT, Custom);
464       setOperationAction(ISD::FP_EXTEND, VT, Custom);
465       // Custom-lower insert/extract operations to simplify patterns.
466       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
467       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
468       // Expand various condition codes (explained above).
469       for (auto CC : VFPCCToExpand)
470         setCondCodeAction(CC, VT, Expand);
471     };
472 
473     if (Subtarget.hasStdExtZfh())
474       for (MVT VT : F16VecVTs)
475         SetCommonVFPActions(VT);
476 
477     if (Subtarget.hasStdExtF())
478       for (MVT VT : F32VecVTs)
479         SetCommonVFPActions(VT);
480 
481     if (Subtarget.hasStdExtD())
482       for (MVT VT : F64VecVTs)
483         SetCommonVFPActions(VT);
484   }
485 
486   // Function alignments.
487   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
488   setMinFunctionAlignment(FunctionAlignment);
489   setPrefFunctionAlignment(FunctionAlignment);
490 
491   setMinimumJumpTableEntries(5);
492 
493   // Jumps are expensive, compared to logic
494   setJumpIsExpensive();
495 
496   // We can use any register for comparisons
497   setHasMultipleConditionRegisters();
498 
499   setTargetDAGCombine(ISD::SETCC);
500   if (Subtarget.hasStdExtZbp()) {
501     setTargetDAGCombine(ISD::OR);
502   }
503 }
504 
505 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
506                                             LLVMContext &Context,
507                                             EVT VT) const {
508   if (!VT.isVector())
509     return getPointerTy(DL);
510   if (Subtarget.hasStdExtV() && VT.isScalableVector())
511     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
512   return VT.changeVectorElementTypeToInteger();
513 }
514 
515 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
516                                              const CallInst &I,
517                                              MachineFunction &MF,
518                                              unsigned Intrinsic) const {
519   switch (Intrinsic) {
520   default:
521     return false;
522   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
523   case Intrinsic::riscv_masked_atomicrmw_add_i32:
524   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
525   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
526   case Intrinsic::riscv_masked_atomicrmw_max_i32:
527   case Intrinsic::riscv_masked_atomicrmw_min_i32:
528   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
529   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
530   case Intrinsic::riscv_masked_cmpxchg_i32:
531     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
532     Info.opc = ISD::INTRINSIC_W_CHAIN;
533     Info.memVT = MVT::getVT(PtrTy->getElementType());
534     Info.ptrVal = I.getArgOperand(0);
535     Info.offset = 0;
536     Info.align = Align(4);
537     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
538                  MachineMemOperand::MOVolatile;
539     return true;
540   }
541 }
542 
543 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
544                                                 const AddrMode &AM, Type *Ty,
545                                                 unsigned AS,
546                                                 Instruction *I) const {
547   // No global is ever allowed as a base.
548   if (AM.BaseGV)
549     return false;
550 
551   // Require a 12-bit signed offset.
552   if (!isInt<12>(AM.BaseOffs))
553     return false;
554 
555   switch (AM.Scale) {
556   case 0: // "r+i" or just "i", depending on HasBaseReg.
557     break;
558   case 1:
559     if (!AM.HasBaseReg) // allow "r+i".
560       break;
561     return false; // disallow "r+r" or "r+r+i".
562   default:
563     return false;
564   }
565 
566   return true;
567 }
568 
569 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
570   return isInt<12>(Imm);
571 }
572 
573 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
574   return isInt<12>(Imm);
575 }
576 
577 // On RV32, 64-bit integers are split into their high and low parts and held
578 // in two different registers, so the trunc is free since the low register can
579 // just be used.
580 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
581   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
582     return false;
583   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
584   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
585   return (SrcBits == 64 && DestBits == 32);
586 }
587 
588 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
589   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
590       !SrcVT.isInteger() || !DstVT.isInteger())
591     return false;
592   unsigned SrcBits = SrcVT.getSizeInBits();
593   unsigned DestBits = DstVT.getSizeInBits();
594   return (SrcBits == 64 && DestBits == 32);
595 }
596 
597 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
598   // Zexts are free if they can be combined with a load.
599   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
600     EVT MemVT = LD->getMemoryVT();
601     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
602          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
603         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
604          LD->getExtensionType() == ISD::ZEXTLOAD))
605       return true;
606   }
607 
608   return TargetLowering::isZExtFree(Val, VT2);
609 }
610 
611 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
612   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
613 }
614 
615 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
616   return Subtarget.hasStdExtZbb();
617 }
618 
619 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
620   return Subtarget.hasStdExtZbb();
621 }
622 
623 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
624                                        bool ForCodeSize) const {
625   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
626     return false;
627   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
628     return false;
629   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
630     return false;
631   if (Imm.isNegZero())
632     return false;
633   return Imm.isZero();
634 }
635 
636 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
637   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
638          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
639          (VT == MVT::f64 && Subtarget.hasStdExtD());
640 }
641 
642 // Changes the condition code and swaps operands if necessary, so the SetCC
643 // operation matches one of the comparisons supported directly in the RISC-V
644 // ISA.
645 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
646   switch (CC) {
647   default:
648     break;
649   case ISD::SETGT:
650   case ISD::SETLE:
651   case ISD::SETUGT:
652   case ISD::SETULE:
653     CC = ISD::getSetCCSwappedOperands(CC);
654     std::swap(LHS, RHS);
655     break;
656   }
657 }
658 
659 // Return the RISC-V branch opcode that matches the given DAG integer
660 // condition code. The CondCode must be one of those supported by the RISC-V
661 // ISA (see normaliseSetCC).
662 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
663   switch (CC) {
664   default:
665     llvm_unreachable("Unsupported CondCode");
666   case ISD::SETEQ:
667     return RISCV::BEQ;
668   case ISD::SETNE:
669     return RISCV::BNE;
670   case ISD::SETLT:
671     return RISCV::BLT;
672   case ISD::SETGE:
673     return RISCV::BGE;
674   case ISD::SETULT:
675     return RISCV::BLTU;
676   case ISD::SETUGE:
677     return RISCV::BGEU;
678   }
679 }
680 
681 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
682                                             SelectionDAG &DAG) const {
683   switch (Op.getOpcode()) {
684   default:
685     report_fatal_error("unimplemented operand");
686   case ISD::GlobalAddress:
687     return lowerGlobalAddress(Op, DAG);
688   case ISD::BlockAddress:
689     return lowerBlockAddress(Op, DAG);
690   case ISD::ConstantPool:
691     return lowerConstantPool(Op, DAG);
692   case ISD::JumpTable:
693     return lowerJumpTable(Op, DAG);
694   case ISD::GlobalTLSAddress:
695     return lowerGlobalTLSAddress(Op, DAG);
696   case ISD::SELECT:
697     return lowerSELECT(Op, DAG);
698   case ISD::VASTART:
699     return lowerVASTART(Op, DAG);
700   case ISD::FRAMEADDR:
701     return lowerFRAMEADDR(Op, DAG);
702   case ISD::RETURNADDR:
703     return lowerRETURNADDR(Op, DAG);
704   case ISD::SHL_PARTS:
705     return lowerShiftLeftParts(Op, DAG);
706   case ISD::SRA_PARTS:
707     return lowerShiftRightParts(Op, DAG, true);
708   case ISD::SRL_PARTS:
709     return lowerShiftRightParts(Op, DAG, false);
710   case ISD::BITCAST: {
711     assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
712             Subtarget.hasStdExtZfh()) &&
713            "Unexpected custom legalisation");
714     SDLoc DL(Op);
715     SDValue Op0 = Op.getOperand(0);
716     if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
717       if (Op0.getValueType() != MVT::i16)
718         return SDValue();
719       SDValue NewOp0 =
720           DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
721       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
722       return FPConv;
723     } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
724                Subtarget.hasStdExtF()) {
725       if (Op0.getValueType() != MVT::i32)
726         return SDValue();
727       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
728       SDValue FPConv =
729           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
730       return FPConv;
731     }
732     return SDValue();
733   }
734   case ISD::INTRINSIC_WO_CHAIN:
735     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
736   case ISD::INTRINSIC_W_CHAIN:
737     return LowerINTRINSIC_W_CHAIN(Op, DAG);
738   case ISD::BSWAP:
739   case ISD::BITREVERSE: {
740     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
741     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
742     MVT VT = Op.getSimpleValueType();
743     SDLoc DL(Op);
744     // Start with the maximum immediate value which is the bitwidth - 1.
745     unsigned Imm = VT.getSizeInBits() - 1;
746     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
747     if (Op.getOpcode() == ISD::BSWAP)
748       Imm &= ~0x7U;
749     return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
750                        DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
751   }
752   case ISD::FSHL:
753   case ISD::FSHR: {
754     MVT VT = Op.getSimpleValueType();
755     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
756     SDLoc DL(Op);
757     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
758     // use log(XLen) bits. Mask the shift amount accordingly.
759     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
760     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
761                                 DAG.getConstant(ShAmtWidth, DL, VT));
762     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
763     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
764   }
765   case ISD::TRUNCATE: {
766     SDLoc DL(Op);
767     EVT VT = Op.getValueType();
768     // Only custom-lower vector truncates
769     if (!VT.isVector())
770       return Op;
771 
772     // Truncates to mask types are handled differently
773     if (VT.getVectorElementType() == MVT::i1)
774       return lowerVectorMaskTrunc(Op, DAG);
775 
776     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
777     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which
778     // truncate by one power of two at a time.
779     EVT DstEltVT = VT.getVectorElementType();
780 
781     SDValue Src = Op.getOperand(0);
782     EVT SrcVT = Src.getValueType();
783     EVT SrcEltVT = SrcVT.getVectorElementType();
784 
785     assert(DstEltVT.bitsLT(SrcEltVT) &&
786            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
787            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
788            "Unexpected vector truncate lowering");
789 
790     SDValue Result = Src;
791     LLVMContext &Context = *DAG.getContext();
792     const ElementCount Count = SrcVT.getVectorElementCount();
793     do {
794       SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2);
795       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
796       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result);
797     } while (SrcEltVT != DstEltVT);
798 
799     return Result;
800   }
801   case ISD::ANY_EXTEND:
802   case ISD::ZERO_EXTEND:
803     return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
804   case ISD::SIGN_EXTEND:
805     return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
806   case ISD::SPLAT_VECTOR:
807     return lowerSPLATVECTOR(Op, DAG);
808   case ISD::INSERT_VECTOR_ELT:
809     return lowerINSERT_VECTOR_ELT(Op, DAG);
810   case ISD::EXTRACT_VECTOR_ELT:
811     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
812   case ISD::VSCALE: {
813     MVT VT = Op.getSimpleValueType();
814     SDLoc DL(Op);
815     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
816     // We define our scalable vector types for lmul=1 to use a 64 bit known
817     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
818     // vscale as VLENB / 8.
819     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
820                                  DAG.getConstant(3, DL, VT));
821     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
822   }
823   case ISD::FP_EXTEND: {
824     // RVV can only do fp_extend to types double the size as the source. We
825     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
826     // via f32.
827     MVT VT = Op.getSimpleValueType();
828     MVT SrcVT = Op.getOperand(0).getSimpleValueType();
829     // We only need to close the gap between vXf16->vXf64.
830     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
831         SrcVT.getVectorElementType() != MVT::f16)
832       return Op;
833     SDLoc DL(Op);
834     MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
835     SDValue IntermediateRound =
836         DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT);
837     return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
838   }
839   case ISD::FP_ROUND: {
840     // RVV can only do fp_round to types half the size as the source. We
841     // custom-lower f64->f16 rounds via RVV's round-to-odd float
842     // conversion instruction.
843     MVT VT = Op.getSimpleValueType();
844     MVT SrcVT = Op.getOperand(0).getSimpleValueType();
845     // We only need to close the gap between vXf64<->vXf16.
846     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
847         SrcVT.getVectorElementType() != MVT::f64)
848       return Op;
849     SDLoc DL(Op);
850     MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
851     SDValue IntermediateRound =
852         DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0));
853     return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
854   }
855   case ISD::FP_TO_SINT:
856   case ISD::FP_TO_UINT:
857   case ISD::SINT_TO_FP:
858   case ISD::UINT_TO_FP: {
859     // RVV can only do fp<->int conversions to types half/double the size as
860     // the source. We custom-lower any conversions that do two hops into
861     // sequences.
862     MVT VT = Op.getSimpleValueType();
863     if (!VT.isVector())
864       return Op;
865     SDLoc DL(Op);
866     SDValue Src = Op.getOperand(0);
867     MVT EltVT = VT.getVectorElementType();
868     MVT SrcEltVT = Src.getSimpleValueType().getVectorElementType();
869     unsigned EltSize = EltVT.getSizeInBits();
870     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
871     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
872            "Unexpected vector element types");
873     bool IsInt2FP = SrcEltVT.isInteger();
874     // Widening conversions
875     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
876       if (IsInt2FP) {
877         // Do a regular integer sign/zero extension then convert to float.
878         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
879                                       VT.getVectorElementCount());
880         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
881                                  ? ISD::ZERO_EXTEND
882                                  : ISD::SIGN_EXTEND;
883         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
884         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
885       }
886       // FP2Int
887       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
888       // Do one doubling fp_extend then complete the operation by converting
889       // to int.
890       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
891       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
892       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
893     }
894 
895     // Narrowing conversions
896     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
897       if (IsInt2FP) {
898         // One narrowing int_to_fp, then an fp_round.
899         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
900         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
901         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
902         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
903       }
904       // FP2Int
905       // One narrowing fp_to_int, then truncate the integer. If the float isn't
906       // representable by the integer, the result is poison.
907       MVT IVecVT =
908           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
909                            VT.getVectorElementCount());
910       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
911       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
912     }
913 
914     return Op;
915   }
916   case ISD::VECREDUCE_ADD:
917   case ISD::VECREDUCE_UMAX:
918   case ISD::VECREDUCE_SMAX:
919   case ISD::VECREDUCE_UMIN:
920   case ISD::VECREDUCE_SMIN:
921   case ISD::VECREDUCE_AND:
922   case ISD::VECREDUCE_OR:
923   case ISD::VECREDUCE_XOR:
924     return lowerVECREDUCE(Op, DAG);
925   }
926 }
927 
928 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
929                              SelectionDAG &DAG, unsigned Flags) {
930   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
931 }
932 
933 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
934                              SelectionDAG &DAG, unsigned Flags) {
935   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
936                                    Flags);
937 }
938 
939 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
940                              SelectionDAG &DAG, unsigned Flags) {
941   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
942                                    N->getOffset(), Flags);
943 }
944 
945 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
946                              SelectionDAG &DAG, unsigned Flags) {
947   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
948 }
949 
950 template <class NodeTy>
951 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
952                                      bool IsLocal) const {
953   SDLoc DL(N);
954   EVT Ty = getPointerTy(DAG.getDataLayout());
955 
956   if (isPositionIndependent()) {
957     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
958     if (IsLocal)
959       // Use PC-relative addressing to access the symbol. This generates the
960       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
961       // %pcrel_lo(auipc)).
962       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
963 
964     // Use PC-relative addressing to access the GOT for this symbol, then load
965     // the address from the GOT. This generates the pattern (PseudoLA sym),
966     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
967     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
968   }
969 
970   switch (getTargetMachine().getCodeModel()) {
971   default:
972     report_fatal_error("Unsupported code model for lowering");
973   case CodeModel::Small: {
974     // Generate a sequence for accessing addresses within the first 2 GiB of
975     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
976     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
977     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
978     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
979     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
980   }
981   case CodeModel::Medium: {
982     // Generate a sequence for accessing addresses within any 2GiB range within
983     // the address space. This generates the pattern (PseudoLLA sym), which
984     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
985     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
986     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
987   }
988   }
989 }
990 
991 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
992                                                 SelectionDAG &DAG) const {
993   SDLoc DL(Op);
994   EVT Ty = Op.getValueType();
995   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
996   int64_t Offset = N->getOffset();
997   MVT XLenVT = Subtarget.getXLenVT();
998 
999   const GlobalValue *GV = N->getGlobal();
1000   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1001   SDValue Addr = getAddr(N, DAG, IsLocal);
1002 
1003   // In order to maximise the opportunity for common subexpression elimination,
1004   // emit a separate ADD node for the global address offset instead of folding
1005   // it in the global address node. Later peephole optimisations may choose to
1006   // fold it back in when profitable.
1007   if (Offset != 0)
1008     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1009                        DAG.getConstant(Offset, DL, XLenVT));
1010   return Addr;
1011 }
1012 
1013 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
1014                                                SelectionDAG &DAG) const {
1015   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
1016 
1017   return getAddr(N, DAG);
1018 }
1019 
1020 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
1021                                                SelectionDAG &DAG) const {
1022   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
1023 
1024   return getAddr(N, DAG);
1025 }
1026 
1027 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
1028                                             SelectionDAG &DAG) const {
1029   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
1030 
1031   return getAddr(N, DAG);
1032 }
1033 
1034 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1035                                               SelectionDAG &DAG,
1036                                               bool UseGOT) const {
1037   SDLoc DL(N);
1038   EVT Ty = getPointerTy(DAG.getDataLayout());
1039   const GlobalValue *GV = N->getGlobal();
1040   MVT XLenVT = Subtarget.getXLenVT();
1041 
1042   if (UseGOT) {
1043     // Use PC-relative addressing to access the GOT for this TLS symbol, then
1044     // load the address from the GOT and add the thread pointer. This generates
1045     // the pattern (PseudoLA_TLS_IE sym), which expands to
1046     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
1047     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1048     SDValue Load =
1049         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
1050 
1051     // Add the thread pointer.
1052     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1053     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
1054   }
1055 
1056   // Generate a sequence for accessing the address relative to the thread
1057   // pointer, with the appropriate adjustment for the thread pointer offset.
1058   // This generates the pattern
1059   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
1060   SDValue AddrHi =
1061       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
1062   SDValue AddrAdd =
1063       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
1064   SDValue AddrLo =
1065       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
1066 
1067   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
1068   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
1069   SDValue MNAdd = SDValue(
1070       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
1071       0);
1072   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
1073 }
1074 
1075 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1076                                                SelectionDAG &DAG) const {
1077   SDLoc DL(N);
1078   EVT Ty = getPointerTy(DAG.getDataLayout());
1079   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1080   const GlobalValue *GV = N->getGlobal();
1081 
1082   // Use a PC-relative addressing mode to access the global dynamic GOT address.
1083   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
1084   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
1085   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1086   SDValue Load =
1087       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
1088 
1089   // Prepare argument list to generate call.
1090   ArgListTy Args;
1091   ArgListEntry Entry;
1092   Entry.Node = Load;
1093   Entry.Ty = CallTy;
1094   Args.push_back(Entry);
1095 
1096   // Setup call to __tls_get_addr.
1097   TargetLowering::CallLoweringInfo CLI(DAG);
1098   CLI.setDebugLoc(DL)
1099       .setChain(DAG.getEntryNode())
1100       .setLibCallee(CallingConv::C, CallTy,
1101                     DAG.getExternalSymbol("__tls_get_addr", Ty),
1102                     std::move(Args));
1103 
1104   return LowerCallTo(CLI).first;
1105 }
1106 
1107 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1108                                                    SelectionDAG &DAG) const {
1109   SDLoc DL(Op);
1110   EVT Ty = Op.getValueType();
1111   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1112   int64_t Offset = N->getOffset();
1113   MVT XLenVT = Subtarget.getXLenVT();
1114 
1115   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
1116 
1117   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1118       CallingConv::GHC)
1119     report_fatal_error("In GHC calling convention TLS is not supported");
1120 
1121   SDValue Addr;
1122   switch (Model) {
1123   case TLSModel::LocalExec:
1124     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
1125     break;
1126   case TLSModel::InitialExec:
1127     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
1128     break;
1129   case TLSModel::LocalDynamic:
1130   case TLSModel::GeneralDynamic:
1131     Addr = getDynamicTLSAddr(N, DAG);
1132     break;
1133   }
1134 
1135   // In order to maximise the opportunity for common subexpression elimination,
1136   // emit a separate ADD node for the global address offset instead of folding
1137   // it in the global address node. Later peephole optimisations may choose to
1138   // fold it back in when profitable.
1139   if (Offset != 0)
1140     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1141                        DAG.getConstant(Offset, DL, XLenVT));
1142   return Addr;
1143 }
1144 
1145 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
1146   SDValue CondV = Op.getOperand(0);
1147   SDValue TrueV = Op.getOperand(1);
1148   SDValue FalseV = Op.getOperand(2);
1149   SDLoc DL(Op);
1150   MVT XLenVT = Subtarget.getXLenVT();
1151 
1152   // If the result type is XLenVT and CondV is the output of a SETCC node
1153   // which also operated on XLenVT inputs, then merge the SETCC node into the
1154   // lowered RISCVISD::SELECT_CC to take advantage of the integer
1155   // compare+branch instructions. i.e.:
1156   // (select (setcc lhs, rhs, cc), truev, falsev)
1157   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
1158   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
1159       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
1160     SDValue LHS = CondV.getOperand(0);
1161     SDValue RHS = CondV.getOperand(1);
1162     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
1163     ISD::CondCode CCVal = CC->get();
1164 
1165     normaliseSetCC(LHS, RHS, CCVal);
1166 
1167     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
1168     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1169     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1170   }
1171 
1172   // Otherwise:
1173   // (select condv, truev, falsev)
1174   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
1175   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
1176   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
1177 
1178   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1179 
1180   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1181 }
1182 
1183 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1184   MachineFunction &MF = DAG.getMachineFunction();
1185   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
1186 
1187   SDLoc DL(Op);
1188   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1189                                  getPointerTy(MF.getDataLayout()));
1190 
1191   // vastart just stores the address of the VarArgsFrameIndex slot into the
1192   // memory location argument.
1193   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1194   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1195                       MachinePointerInfo(SV));
1196 }
1197 
1198 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
1199                                             SelectionDAG &DAG) const {
1200   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1201   MachineFunction &MF = DAG.getMachineFunction();
1202   MachineFrameInfo &MFI = MF.getFrameInfo();
1203   MFI.setFrameAddressIsTaken(true);
1204   Register FrameReg = RI.getFrameRegister(MF);
1205   int XLenInBytes = Subtarget.getXLen() / 8;
1206 
1207   EVT VT = Op.getValueType();
1208   SDLoc DL(Op);
1209   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1210   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1211   while (Depth--) {
1212     int Offset = -(XLenInBytes * 2);
1213     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1214                               DAG.getIntPtrConstant(Offset, DL));
1215     FrameAddr =
1216         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1217   }
1218   return FrameAddr;
1219 }
1220 
1221 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
1222                                              SelectionDAG &DAG) const {
1223   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1224   MachineFunction &MF = DAG.getMachineFunction();
1225   MachineFrameInfo &MFI = MF.getFrameInfo();
1226   MFI.setReturnAddressIsTaken(true);
1227   MVT XLenVT = Subtarget.getXLenVT();
1228   int XLenInBytes = Subtarget.getXLen() / 8;
1229 
1230   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1231     return SDValue();
1232 
1233   EVT VT = Op.getValueType();
1234   SDLoc DL(Op);
1235   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1236   if (Depth) {
1237     int Off = -XLenInBytes;
1238     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
1239     SDValue Offset = DAG.getConstant(Off, DL, VT);
1240     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1241                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1242                        MachinePointerInfo());
1243   }
1244 
1245   // Return the value of the return address register, marking it an implicit
1246   // live-in.
1247   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
1248   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
1249 }
1250 
1251 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
1252                                                  SelectionDAG &DAG) const {
1253   SDLoc DL(Op);
1254   SDValue Lo = Op.getOperand(0);
1255   SDValue Hi = Op.getOperand(1);
1256   SDValue Shamt = Op.getOperand(2);
1257   EVT VT = Lo.getValueType();
1258 
1259   // if Shamt-XLEN < 0: // Shamt < XLEN
1260   //   Lo = Lo << Shamt
1261   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
1262   // else:
1263   //   Lo = 0
1264   //   Hi = Lo << (Shamt-XLEN)
1265 
1266   SDValue Zero = DAG.getConstant(0, DL, VT);
1267   SDValue One = DAG.getConstant(1, DL, VT);
1268   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1269   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1270   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1271   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1272 
1273   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1274   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1275   SDValue ShiftRightLo =
1276       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
1277   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1278   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1279   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
1280 
1281   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1282 
1283   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1284   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1285 
1286   SDValue Parts[2] = {Lo, Hi};
1287   return DAG.getMergeValues(Parts, DL);
1288 }
1289 
1290 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
1291                                                   bool IsSRA) const {
1292   SDLoc DL(Op);
1293   SDValue Lo = Op.getOperand(0);
1294   SDValue Hi = Op.getOperand(1);
1295   SDValue Shamt = Op.getOperand(2);
1296   EVT VT = Lo.getValueType();
1297 
1298   // SRA expansion:
1299   //   if Shamt-XLEN < 0: // Shamt < XLEN
1300   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1301   //     Hi = Hi >>s Shamt
1302   //   else:
1303   //     Lo = Hi >>s (Shamt-XLEN);
1304   //     Hi = Hi >>s (XLEN-1)
1305   //
1306   // SRL expansion:
1307   //   if Shamt-XLEN < 0: // Shamt < XLEN
1308   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1309   //     Hi = Hi >>u Shamt
1310   //   else:
1311   //     Lo = Hi >>u (Shamt-XLEN);
1312   //     Hi = 0;
1313 
1314   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1315 
1316   SDValue Zero = DAG.getConstant(0, DL, VT);
1317   SDValue One = DAG.getConstant(1, DL, VT);
1318   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1319   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1320   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1321   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1322 
1323   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1324   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1325   SDValue ShiftLeftHi =
1326       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
1327   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1328   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1329   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
1330   SDValue HiFalse =
1331       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
1332 
1333   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1334 
1335   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1336   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1337 
1338   SDValue Parts[2] = {Lo, Hi};
1339   return DAG.getMergeValues(Parts, DL);
1340 }
1341 
1342 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is
1343 // illegal (currently only vXi64 RV32).
1344 // FIXME: We could also catch non-constant sign-extended i32 values and lower
1345 // them to SPLAT_VECTOR_I64
1346 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op,
1347                                               SelectionDAG &DAG) const {
1348   SDLoc DL(Op);
1349   EVT VecVT = Op.getValueType();
1350   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
1351          "Unexpected SPLAT_VECTOR lowering");
1352   SDValue SplatVal = Op.getOperand(0);
1353 
1354   // If we can prove that the value is a sign-extended 32-bit value, lower this
1355   // as a custom node in order to try and match RVV vector/scalar instructions.
1356   if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) {
1357     if (isInt<32>(CVal->getSExtValue()))
1358       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1359                          DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32));
1360   }
1361 
1362   if (SplatVal.getOpcode() == ISD::SIGN_EXTEND &&
1363       SplatVal.getOperand(0).getValueType() == MVT::i32) {
1364     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1365                        SplatVal.getOperand(0));
1366   }
1367 
1368   // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
1369   // to accidentally sign-extend the 32-bit halves to the e64 SEW:
1370   // vmv.v.x vX, hi
1371   // vsll.vx vX, vX, /*32*/
1372   // vmv.v.x vY, lo
1373   // vsll.vx vY, vY, /*32*/
1374   // vsrl.vx vY, vY, /*32*/
1375   // vor.vv vX, vX, vY
1376   SDValue One = DAG.getConstant(1, DL, MVT::i32);
1377   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
1378   SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
1379   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero);
1380   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One);
1381 
1382   Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
1383   Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
1384   Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
1385 
1386   if (isNullConstant(Hi))
1387     return Lo;
1388 
1389   Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
1390   Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
1391 
1392   return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
1393 }
1394 
1395 // Custom-lower extensions from mask vectors by using a vselect either with 1
1396 // for zero/any-extension or -1 for sign-extension:
1397 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
1398 // Note that any-extension is lowered identically to zero-extension.
1399 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
1400                                                 int64_t ExtTrueVal) const {
1401   SDLoc DL(Op);
1402   EVT VecVT = Op.getValueType();
1403   SDValue Src = Op.getOperand(0);
1404   // Only custom-lower extensions from mask types
1405   if (!Src.getValueType().isVector() ||
1406       Src.getValueType().getVectorElementType() != MVT::i1)
1407     return Op;
1408 
1409   // Be careful not to introduce illegal scalar types at this stage, and be
1410   // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
1411   // illegal and must be expanded. Since we know that the constants are
1412   // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
1413   bool IsRV32E64 =
1414       !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
1415   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1416   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT());
1417 
1418   if (!IsRV32E64) {
1419     SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
1420     SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
1421   } else {
1422     SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
1423     SplatTrueVal =
1424         DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
1425   }
1426 
1427   return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
1428 }
1429 
1430 // Custom-lower truncations from vectors to mask vectors by using a mask and a
1431 // setcc operation:
1432 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
1433 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
1434                                                   SelectionDAG &DAG) const {
1435   SDLoc DL(Op);
1436   EVT MaskVT = Op.getValueType();
1437   // Only expect to custom-lower truncations to mask types
1438   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
1439          "Unexpected type for vector mask lowering");
1440   SDValue Src = Op.getOperand(0);
1441   EVT VecVT = Src.getValueType();
1442 
1443   // Be careful not to introduce illegal scalar types at this stage, and be
1444   // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
1445   // illegal and must be expanded. Since we know that the constants are
1446   // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
1447   bool IsRV32E64 =
1448       !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
1449   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
1450   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1451 
1452   if (!IsRV32E64) {
1453     SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne);
1454     SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
1455   } else {
1456     SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne);
1457     SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
1458   }
1459 
1460   SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
1461 
1462   return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
1463 }
1464 
1465 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1466                                                     SelectionDAG &DAG) const {
1467   SDLoc DL(Op);
1468   EVT VecVT = Op.getValueType();
1469   SDValue Vec = Op.getOperand(0);
1470   SDValue Val = Op.getOperand(1);
1471   SDValue Idx = Op.getOperand(2);
1472 
1473   // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is
1474   // first slid down into position, the value is inserted into the first
1475   // position, and the vector is slid back up. We do this to simplify patterns.
1476   //   (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx),
1477   if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) {
1478     if (isNullConstant(Idx))
1479       return Op;
1480     SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
1481                                     DAG.getUNDEF(VecVT), Vec, Idx);
1482     SDValue InsertElt0 =
1483         DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val,
1484                     DAG.getConstant(0, DL, Subtarget.getXLenVT()));
1485 
1486     return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx);
1487   }
1488 
1489   // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
1490   // is illegal (currently only vXi64 RV32).
1491   // Since there is no easy way of getting a single element into a vector when
1492   // XLEN<SEW, we lower the operation to the following sequence:
1493   //   splat      vVal, rVal
1494   //   vid.v      vVid
1495   //   vmseq.vx   mMask, vVid, rIdx
1496   //   vmerge.vvm vDest, vSrc, vVal, mMask
1497   // This essentially merges the original vector with the inserted element by
1498   // using a mask whose only set bit is that corresponding to the insert
1499   // index.
1500   SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val);
1501   SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx);
1502 
1503   SDValue VID = DAG.getNode(RISCVISD::VID, DL, VecVT);
1504   auto SetCCVT =
1505       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT);
1506   SDValue Mask = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ);
1507 
1508   return DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, SplattedVal, Vec);
1509 }
1510 
1511 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
1512 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
1513 // types this is done using VMV_X_S to allow us to glean information about the
1514 // sign bits of the result.
1515 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1516                                                      SelectionDAG &DAG) const {
1517   SDLoc DL(Op);
1518   SDValue Idx = Op.getOperand(1);
1519   SDValue Vec = Op.getOperand(0);
1520   EVT EltVT = Op.getValueType();
1521   EVT VecVT = Vec.getValueType();
1522   MVT XLenVT = Subtarget.getXLenVT();
1523 
1524   // If the index is 0, the vector is already in the right position.
1525   if (!isNullConstant(Idx)) {
1526     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, DAG.getUNDEF(VecVT), Vec,
1527                       Idx);
1528   }
1529 
1530   if (!EltVT.isInteger()) {
1531     // Floating-point extracts are handled in TableGen.
1532     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
1533                        DAG.getConstant(0, DL, XLenVT));
1534   }
1535 
1536   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
1537   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
1538 }
1539 
1540 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1541                                                      SelectionDAG &DAG) const {
1542   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1543   SDLoc DL(Op);
1544 
1545   if (Subtarget.hasStdExtV()) {
1546     // Some RVV intrinsics may claim that they want an integer operand to be
1547     // extended.
1548     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1549             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1550       if (II->ExtendedOperand) {
1551         assert(II->ExtendedOperand < Op.getNumOperands());
1552         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1553         SDValue &ScalarOp = Operands[II->ExtendedOperand];
1554         EVT OpVT = ScalarOp.getValueType();
1555         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1556             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1557           // If the operand is a constant, sign extend to increase our chances
1558           // of being able to use a .vi instruction. ANY_EXTEND would become a
1559           // a zero extend and the simm5 check in isel would fail.
1560           // FIXME: Should we ignore the upper bits in isel instead?
1561           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1562                                                           : ISD::ANY_EXTEND;
1563           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1564           return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
1565                              Operands);
1566         }
1567       }
1568     }
1569   }
1570 
1571   switch (IntNo) {
1572   default:
1573     return SDValue();    // Don't custom lower most intrinsics.
1574   case Intrinsic::thread_pointer: {
1575     EVT PtrVT = getPointerTy(DAG.getDataLayout());
1576     return DAG.getRegister(RISCV::X4, PtrVT);
1577   }
1578   case Intrinsic::riscv_vmv_x_s:
1579     assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!");
1580     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
1581                        Op.getOperand(1));
1582   }
1583 }
1584 
1585 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
1586                                                     SelectionDAG &DAG) const {
1587   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1588   SDLoc DL(Op);
1589 
1590   if (Subtarget.hasStdExtV()) {
1591     // Some RVV intrinsics may claim that they want an integer operand to be
1592     // extended.
1593     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1594             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1595       if (II->ExtendedOperand) {
1596         // The operands start from the second argument in INTRINSIC_W_CHAIN.
1597         unsigned ExtendOp = II->ExtendedOperand + 1;
1598         assert(ExtendOp < Op.getNumOperands());
1599         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1600         SDValue &ScalarOp = Operands[ExtendOp];
1601         EVT OpVT = ScalarOp.getValueType();
1602         if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1603             (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1604           // If the operand is a constant, sign extend to increase our chances
1605           // of being able to use a .vi instruction. ANY_EXTEND would become a
1606           // a zero extend and the simm5 check in isel would fail.
1607           // FIXME: Should we ignore the upper bits in isel instead?
1608           unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1609                                                           : ISD::ANY_EXTEND;
1610           ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1611           return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(),
1612                              Operands);
1613         }
1614       }
1615     }
1616   }
1617 
1618   switch (IntNo) {
1619   default:
1620     return SDValue(); // Don't custom lower most intrinsics.
1621   case Intrinsic::riscv_vleff: {
1622     SDLoc DL(Op);
1623     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
1624     SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0),
1625                                Op.getOperand(2), Op.getOperand(3));
1626     SDValue ReadVL =
1627         SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1),
1628                                    Load.getValue(2)),
1629                 0);
1630     return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
1631   }
1632   case Intrinsic::riscv_vleff_mask: {
1633     SDLoc DL(Op);
1634     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
1635     SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0),
1636                                Op.getOperand(2), Op.getOperand(3),
1637                                Op.getOperand(4), Op.getOperand(5));
1638     SDValue ReadVL =
1639         SDValue(DAG.getMachineNode(RISCV::PseudoReadVL, DL, Op->getValueType(1),
1640                                    Load.getValue(2)),
1641                 0);
1642     return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
1643   }
1644   }
1645 }
1646 
1647 static std::pair<unsigned, uint64_t>
1648 getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) {
1649   switch (ISDOpcode) {
1650   default:
1651     llvm_unreachable("Unhandled reduction");
1652   case ISD::VECREDUCE_ADD:
1653     return {RISCVISD::VECREDUCE_ADD, 0};
1654   case ISD::VECREDUCE_UMAX:
1655     return {RISCVISD::VECREDUCE_UMAX, 0};
1656   case ISD::VECREDUCE_SMAX:
1657     return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)};
1658   case ISD::VECREDUCE_UMIN:
1659     return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)};
1660   case ISD::VECREDUCE_SMIN:
1661     return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)};
1662   case ISD::VECREDUCE_AND:
1663     return {RISCVISD::VECREDUCE_AND, -1};
1664   case ISD::VECREDUCE_OR:
1665     return {RISCVISD::VECREDUCE_OR, 0};
1666   case ISD::VECREDUCE_XOR:
1667     return {RISCVISD::VECREDUCE_XOR, 0};
1668   }
1669 }
1670 
1671 // Take a (supported) standard ISD reduction opcode and transform it to a RISCV
1672 // reduction opcode. Note that this returns a vector type, which must be
1673 // further processed to access the scalar result in element 0.
1674 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
1675                                             SelectionDAG &DAG) const {
1676   SDLoc DL(Op);
1677   assert(Op.getValueType().isSimple() &&
1678          Op.getOperand(0).getValueType().isSimple() &&
1679          "Unexpected vector-reduce lowering");
1680   MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType();
1681   unsigned RVVOpcode;
1682   uint64_t IdentityVal;
1683   std::tie(RVVOpcode, IdentityVal) =
1684       getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits());
1685   // We have to perform a bit of a dance to get from our vector type to the
1686   // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector
1687   // element type to find the type which fills a single register. Be careful to
1688   // use the operand's vector element type rather than the reduction's value
1689   // type, as that has likely been extended to XLEN.
1690   unsigned NumElts = 64 / VecEltVT.getSizeInBits();
1691   MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts);
1692   SDValue IdentitySplat =
1693       DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT));
1694   SDValue Reduction =
1695       DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat);
1696   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
1697                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
1698   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
1699 }
1700 
1701 // Returns the opcode of the target-specific SDNode that implements the 32-bit
1702 // form of the given Opcode.
1703 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
1704   switch (Opcode) {
1705   default:
1706     llvm_unreachable("Unexpected opcode");
1707   case ISD::SHL:
1708     return RISCVISD::SLLW;
1709   case ISD::SRA:
1710     return RISCVISD::SRAW;
1711   case ISD::SRL:
1712     return RISCVISD::SRLW;
1713   case ISD::SDIV:
1714     return RISCVISD::DIVW;
1715   case ISD::UDIV:
1716     return RISCVISD::DIVUW;
1717   case ISD::UREM:
1718     return RISCVISD::REMUW;
1719   case ISD::ROTL:
1720     return RISCVISD::ROLW;
1721   case ISD::ROTR:
1722     return RISCVISD::RORW;
1723   case RISCVISD::GREVI:
1724     return RISCVISD::GREVIW;
1725   case RISCVISD::GORCI:
1726     return RISCVISD::GORCIW;
1727   }
1728 }
1729 
1730 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
1731 // Because i32 isn't a legal type for RV64, these operations would otherwise
1732 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
1733 // later one because the fact the operation was originally of type i32 is
1734 // lost.
1735 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
1736                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
1737   SDLoc DL(N);
1738   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1739   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1740   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1741   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1742   // ReplaceNodeResults requires we maintain the same type for the return value.
1743   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1744 }
1745 
1746 // Converts the given 32-bit operation to a i64 operation with signed extension
1747 // semantic to reduce the signed extension instructions.
1748 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
1749   SDLoc DL(N);
1750   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1751   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1752   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
1753   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
1754                                DAG.getValueType(MVT::i32));
1755   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
1756 }
1757 
1758 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1759                                              SmallVectorImpl<SDValue> &Results,
1760                                              SelectionDAG &DAG) const {
1761   SDLoc DL(N);
1762   switch (N->getOpcode()) {
1763   default:
1764     llvm_unreachable("Don't know how to custom type legalize this operation!");
1765   case ISD::STRICT_FP_TO_SINT:
1766   case ISD::STRICT_FP_TO_UINT:
1767   case ISD::FP_TO_SINT:
1768   case ISD::FP_TO_UINT: {
1769     bool IsStrict = N->isStrictFPOpcode();
1770     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1771            "Unexpected custom legalisation");
1772     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
1773     // If the FP type needs to be softened, emit a library call using the 'si'
1774     // version. If we left it to default legalization we'd end up with 'di'. If
1775     // the FP type doesn't need to be softened just let generic type
1776     // legalization promote the result type.
1777     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
1778         TargetLowering::TypeSoftenFloat)
1779       return;
1780     RTLIB::Libcall LC;
1781     if (N->getOpcode() == ISD::FP_TO_SINT ||
1782         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
1783       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
1784     else
1785       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
1786     MakeLibCallOptions CallOptions;
1787     EVT OpVT = Op0.getValueType();
1788     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
1789     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
1790     SDValue Result;
1791     std::tie(Result, Chain) =
1792         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
1793     Results.push_back(Result);
1794     if (IsStrict)
1795       Results.push_back(Chain);
1796     break;
1797   }
1798   case ISD::READCYCLECOUNTER: {
1799     assert(!Subtarget.is64Bit() &&
1800            "READCYCLECOUNTER only has custom type legalization on riscv32");
1801 
1802     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
1803     SDValue RCW =
1804         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
1805 
1806     Results.push_back(
1807         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
1808     Results.push_back(RCW.getValue(2));
1809     break;
1810   }
1811   case ISD::ADD:
1812   case ISD::SUB:
1813   case ISD::MUL:
1814     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1815            "Unexpected custom legalisation");
1816     if (N->getOperand(1).getOpcode() == ISD::Constant)
1817       return;
1818     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
1819     break;
1820   case ISD::SHL:
1821   case ISD::SRA:
1822   case ISD::SRL:
1823     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1824            "Unexpected custom legalisation");
1825     if (N->getOperand(1).getOpcode() == ISD::Constant)
1826       return;
1827     Results.push_back(customLegalizeToWOp(N, DAG));
1828     break;
1829   case ISD::ROTL:
1830   case ISD::ROTR:
1831     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1832            "Unexpected custom legalisation");
1833     Results.push_back(customLegalizeToWOp(N, DAG));
1834     break;
1835   case ISD::SDIV:
1836   case ISD::UDIV:
1837   case ISD::UREM: {
1838     MVT VT = N->getSimpleValueType(0);
1839     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
1840            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
1841            "Unexpected custom legalisation");
1842     if (N->getOperand(0).getOpcode() == ISD::Constant ||
1843         N->getOperand(1).getOpcode() == ISD::Constant)
1844       return;
1845 
1846     // If the input is i32, use ANY_EXTEND since the W instructions don't read
1847     // the upper 32 bits. For other types we need to sign or zero extend
1848     // based on the opcode.
1849     unsigned ExtOpc = ISD::ANY_EXTEND;
1850     if (VT != MVT::i32)
1851       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
1852                                            : ISD::ZERO_EXTEND;
1853 
1854     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
1855     break;
1856   }
1857   case ISD::BITCAST: {
1858     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1859              Subtarget.hasStdExtF()) ||
1860             (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
1861            "Unexpected custom legalisation");
1862     SDValue Op0 = N->getOperand(0);
1863     if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
1864       if (Op0.getValueType() != MVT::f16)
1865         return;
1866       SDValue FPConv =
1867           DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
1868       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
1869     } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1870                Subtarget.hasStdExtF()) {
1871       if (Op0.getValueType() != MVT::f32)
1872         return;
1873       SDValue FPConv =
1874           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
1875       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
1876     }
1877     break;
1878   }
1879   case RISCVISD::GREVI:
1880   case RISCVISD::GORCI: {
1881     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1882            "Unexpected custom legalisation");
1883     // This is similar to customLegalizeToWOp, except that we pass the second
1884     // operand (a TargetConstant) straight through: it is already of type
1885     // XLenVT.
1886     SDLoc DL(N);
1887     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1888     SDValue NewOp0 =
1889         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1890     SDValue NewRes =
1891         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
1892     // ReplaceNodeResults requires we maintain the same type for the return
1893     // value.
1894     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
1895     break;
1896   }
1897   case ISD::BSWAP:
1898   case ISD::BITREVERSE: {
1899     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1900            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1901     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1902                                  N->getOperand(0));
1903     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
1904     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
1905                                  DAG.getTargetConstant(Imm, DL,
1906                                                        Subtarget.getXLenVT()));
1907     // ReplaceNodeResults requires we maintain the same type for the return
1908     // value.
1909     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
1910     break;
1911   }
1912   case ISD::FSHL:
1913   case ISD::FSHR: {
1914     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1915            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
1916     SDValue NewOp0 =
1917         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1918     SDValue NewOp1 =
1919         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1920     SDValue NewOp2 =
1921         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
1922     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
1923     // Mask the shift amount to 5 bits.
1924     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
1925                          DAG.getConstant(0x1f, DL, MVT::i64));
1926     unsigned Opc =
1927         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
1928     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
1929     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
1930     break;
1931   }
1932   case ISD::EXTRACT_VECTOR_ELT: {
1933     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
1934     // type is illegal (currently only vXi64 RV32).
1935     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
1936     // transferred to the destination register. We issue two of these from the
1937     // upper- and lower- halves of the SEW-bit vector element, slid down to the
1938     // first element.
1939     SDLoc DL(N);
1940     SDValue Vec = N->getOperand(0);
1941     SDValue Idx = N->getOperand(1);
1942     EVT VecVT = Vec.getValueType();
1943     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
1944            VecVT.getVectorElementType() == MVT::i64 &&
1945            "Unexpected EXTRACT_VECTOR_ELT legalization");
1946 
1947     SDValue Slidedown = Vec;
1948     // Unless the index is known to be 0, we must slide the vector down to get
1949     // the desired element into index 0.
1950     if (!isNullConstant(Idx))
1951       Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
1952                               DAG.getUNDEF(VecVT), Vec, Idx);
1953 
1954     MVT XLenVT = Subtarget.getXLenVT();
1955     // Extract the lower XLEN bits of the correct vector element.
1956     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx);
1957 
1958     // To extract the upper XLEN bits of the vector element, shift the first
1959     // element right by 32 bits and re-extract the lower XLEN bits.
1960     SDValue ThirtyTwoV =
1961         DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1962                     DAG.getConstant(32, DL, Subtarget.getXLenVT()));
1963     SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV);
1964 
1965     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx);
1966 
1967     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
1968     break;
1969   }
1970   case ISD::INTRINSIC_WO_CHAIN: {
1971     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
1972     switch (IntNo) {
1973     default:
1974       llvm_unreachable(
1975           "Don't know how to custom type legalize this intrinsic!");
1976     case Intrinsic::riscv_vmv_x_s: {
1977       EVT VT = N->getValueType(0);
1978       assert((VT == MVT::i8 || VT == MVT::i16 ||
1979               (Subtarget.is64Bit() && VT == MVT::i32)) &&
1980              "Unexpected custom legalisation!");
1981       SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
1982                                     Subtarget.getXLenVT(), N->getOperand(1));
1983       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
1984       break;
1985     }
1986     }
1987     break;
1988   }
1989   case ISD::VECREDUCE_ADD:
1990   case ISD::VECREDUCE_AND:
1991   case ISD::VECREDUCE_OR:
1992   case ISD::VECREDUCE_XOR:
1993   case ISD::VECREDUCE_SMAX:
1994   case ISD::VECREDUCE_UMAX:
1995   case ISD::VECREDUCE_SMIN:
1996   case ISD::VECREDUCE_UMIN:
1997     // The custom-lowering for these nodes returns a vector whose first element
1998     // is the result of the reduction. Extract its first element and let the
1999     // legalization for EXTRACT_VECTOR_ELT do the rest of the job.
2000     Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG));
2001     break;
2002   }
2003 }
2004 
2005 // A structure to hold one of the bit-manipulation patterns below. Together, a
2006 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
2007 //   (or (and (shl x, 1), 0xAAAAAAAA),
2008 //       (and (srl x, 1), 0x55555555))
2009 struct RISCVBitmanipPat {
2010   SDValue Op;
2011   unsigned ShAmt;
2012   bool IsSHL;
2013 
2014   bool formsPairWith(const RISCVBitmanipPat &Other) const {
2015     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
2016   }
2017 };
2018 
2019 // Matches any of the following bit-manipulation patterns:
2020 //   (and (shl x, 1), (0x55555555 << 1))
2021 //   (and (srl x, 1), 0x55555555)
2022 //   (shl (and x, 0x55555555), 1)
2023 //   (srl (and x, (0x55555555 << 1)), 1)
2024 // where the shift amount and mask may vary thus:
2025 //   [1]  = 0x55555555 / 0xAAAAAAAA
2026 //   [2]  = 0x33333333 / 0xCCCCCCCC
2027 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
2028 //   [8]  = 0x00FF00FF / 0xFF00FF00
2029 //   [16] = 0x0000FFFF / 0xFFFFFFFF
2030 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
2031 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) {
2032   Optional<uint64_t> Mask;
2033   // Optionally consume a mask around the shift operation.
2034   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
2035     Mask = Op.getConstantOperandVal(1);
2036     Op = Op.getOperand(0);
2037   }
2038   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
2039     return None;
2040   bool IsSHL = Op.getOpcode() == ISD::SHL;
2041 
2042   if (!isa<ConstantSDNode>(Op.getOperand(1)))
2043     return None;
2044   auto ShAmt = Op.getConstantOperandVal(1);
2045 
2046   if (!isPowerOf2_64(ShAmt))
2047     return None;
2048 
2049   // These are the unshifted masks which we use to match bit-manipulation
2050   // patterns. They may be shifted left in certain circumstances.
2051   static const uint64_t BitmanipMasks[] = {
2052       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
2053       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL,
2054   };
2055 
2056   unsigned MaskIdx = Log2_64(ShAmt);
2057   if (MaskIdx >= array_lengthof(BitmanipMasks))
2058     return None;
2059 
2060   auto Src = Op.getOperand(0);
2061 
2062   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
2063   auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
2064 
2065   // The expected mask is shifted left when the AND is found around SHL
2066   // patterns.
2067   //   ((x >> 1) & 0x55555555)
2068   //   ((x << 1) & 0xAAAAAAAA)
2069   bool SHLExpMask = IsSHL;
2070 
2071   if (!Mask) {
2072     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
2073     // the mask is all ones: consume that now.
2074     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
2075       Mask = Src.getConstantOperandVal(1);
2076       Src = Src.getOperand(0);
2077       // The expected mask is now in fact shifted left for SRL, so reverse the
2078       // decision.
2079       //   ((x & 0xAAAAAAAA) >> 1)
2080       //   ((x & 0x55555555) << 1)
2081       SHLExpMask = !SHLExpMask;
2082     } else {
2083       // Use a default shifted mask of all-ones if there's no AND, truncated
2084       // down to the expected width. This simplifies the logic later on.
2085       Mask = maskTrailingOnes<uint64_t>(Width);
2086       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
2087     }
2088   }
2089 
2090   if (SHLExpMask)
2091     ExpMask <<= ShAmt;
2092 
2093   if (Mask != ExpMask)
2094     return None;
2095 
2096   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
2097 }
2098 
2099 // Match the following pattern as a GREVI(W) operation
2100 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
2101 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
2102                                const RISCVSubtarget &Subtarget) {
2103   EVT VT = Op.getValueType();
2104 
2105   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
2106     auto LHS = matchRISCVBitmanipPat(Op.getOperand(0));
2107     auto RHS = matchRISCVBitmanipPat(Op.getOperand(1));
2108     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
2109       SDLoc DL(Op);
2110       return DAG.getNode(
2111           RISCVISD::GREVI, DL, VT, LHS->Op,
2112           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
2113     }
2114   }
2115   return SDValue();
2116 }
2117 
2118 // Matches any the following pattern as a GORCI(W) operation
2119 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
2120 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
2121 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
2122 // Note that with the variant of 3.,
2123 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
2124 // the inner pattern will first be matched as GREVI and then the outer
2125 // pattern will be matched to GORC via the first rule above.
2126 // 4.  (or (rotl/rotr x, bitwidth/2), x)
2127 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
2128                                const RISCVSubtarget &Subtarget) {
2129   EVT VT = Op.getValueType();
2130 
2131   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
2132     SDLoc DL(Op);
2133     SDValue Op0 = Op.getOperand(0);
2134     SDValue Op1 = Op.getOperand(1);
2135 
2136     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
2137       if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
2138           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
2139         return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
2140       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
2141       if ((Reverse.getOpcode() == ISD::ROTL ||
2142            Reverse.getOpcode() == ISD::ROTR) &&
2143           Reverse.getOperand(0) == X &&
2144           isa<ConstantSDNode>(Reverse.getOperand(1))) {
2145         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
2146         if (RotAmt == (VT.getSizeInBits() / 2))
2147           return DAG.getNode(
2148               RISCVISD::GORCI, DL, VT, X,
2149               DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
2150       }
2151       return SDValue();
2152     };
2153 
2154     // Check for either commutable permutation of (or (GREVI x, shamt), x)
2155     if (SDValue V = MatchOROfReverse(Op0, Op1))
2156       return V;
2157     if (SDValue V = MatchOROfReverse(Op1, Op0))
2158       return V;
2159 
2160     // OR is commutable so canonicalize its OR operand to the left
2161     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
2162       std::swap(Op0, Op1);
2163     if (Op0.getOpcode() != ISD::OR)
2164       return SDValue();
2165     SDValue OrOp0 = Op0.getOperand(0);
2166     SDValue OrOp1 = Op0.getOperand(1);
2167     auto LHS = matchRISCVBitmanipPat(OrOp0);
2168     // OR is commutable so swap the operands and try again: x might have been
2169     // on the left
2170     if (!LHS) {
2171       std::swap(OrOp0, OrOp1);
2172       LHS = matchRISCVBitmanipPat(OrOp0);
2173     }
2174     auto RHS = matchRISCVBitmanipPat(Op1);
2175     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
2176       return DAG.getNode(
2177           RISCVISD::GORCI, DL, VT, LHS->Op,
2178           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
2179     }
2180   }
2181   return SDValue();
2182 }
2183 
2184 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
2185 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
2186 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
2187 // not undo itself, but they are redundant.
2188 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
2189   unsigned ShAmt1 = N->getConstantOperandVal(1);
2190   SDValue Src = N->getOperand(0);
2191 
2192   if (Src.getOpcode() != N->getOpcode())
2193     return SDValue();
2194 
2195   unsigned ShAmt2 = Src.getConstantOperandVal(1);
2196   Src = Src.getOperand(0);
2197 
2198   unsigned CombinedShAmt;
2199   if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
2200     CombinedShAmt = ShAmt1 | ShAmt2;
2201   else
2202     CombinedShAmt = ShAmt1 ^ ShAmt2;
2203 
2204   if (CombinedShAmt == 0)
2205     return Src;
2206 
2207   SDLoc DL(N);
2208   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
2209                      DAG.getTargetConstant(CombinedShAmt, DL,
2210                                            N->getOperand(1).getValueType()));
2211 }
2212 
2213 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2214                                                DAGCombinerInfo &DCI) const {
2215   SelectionDAG &DAG = DCI.DAG;
2216 
2217   switch (N->getOpcode()) {
2218   default:
2219     break;
2220   case RISCVISD::SplitF64: {
2221     SDValue Op0 = N->getOperand(0);
2222     // If the input to SplitF64 is just BuildPairF64 then the operation is
2223     // redundant. Instead, use BuildPairF64's operands directly.
2224     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
2225       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
2226 
2227     SDLoc DL(N);
2228 
2229     // It's cheaper to materialise two 32-bit integers than to load a double
2230     // from the constant pool and transfer it to integer registers through the
2231     // stack.
2232     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
2233       APInt V = C->getValueAPF().bitcastToAPInt();
2234       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
2235       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
2236       return DCI.CombineTo(N, Lo, Hi);
2237     }
2238 
2239     // This is a target-specific version of a DAGCombine performed in
2240     // DAGCombiner::visitBITCAST. It performs the equivalent of:
2241     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
2242     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
2243     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
2244         !Op0.getNode()->hasOneUse())
2245       break;
2246     SDValue NewSplitF64 =
2247         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
2248                     Op0.getOperand(0));
2249     SDValue Lo = NewSplitF64.getValue(0);
2250     SDValue Hi = NewSplitF64.getValue(1);
2251     APInt SignBit = APInt::getSignMask(32);
2252     if (Op0.getOpcode() == ISD::FNEG) {
2253       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
2254                                   DAG.getConstant(SignBit, DL, MVT::i32));
2255       return DCI.CombineTo(N, Lo, NewHi);
2256     }
2257     assert(Op0.getOpcode() == ISD::FABS);
2258     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
2259                                 DAG.getConstant(~SignBit, DL, MVT::i32));
2260     return DCI.CombineTo(N, Lo, NewHi);
2261   }
2262   case RISCVISD::SLLW:
2263   case RISCVISD::SRAW:
2264   case RISCVISD::SRLW:
2265   case RISCVISD::ROLW:
2266   case RISCVISD::RORW: {
2267     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
2268     SDValue LHS = N->getOperand(0);
2269     SDValue RHS = N->getOperand(1);
2270     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
2271     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
2272     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
2273         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
2274       if (N->getOpcode() != ISD::DELETED_NODE)
2275         DCI.AddToWorklist(N);
2276       return SDValue(N, 0);
2277     }
2278     break;
2279   }
2280   case RISCVISD::FSL:
2281   case RISCVISD::FSR: {
2282     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
2283     SDValue ShAmt = N->getOperand(2);
2284     unsigned BitWidth = ShAmt.getValueSizeInBits();
2285     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
2286     APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
2287     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
2288       if (N->getOpcode() != ISD::DELETED_NODE)
2289         DCI.AddToWorklist(N);
2290       return SDValue(N, 0);
2291     }
2292     break;
2293   }
2294   case RISCVISD::FSLW:
2295   case RISCVISD::FSRW: {
2296     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
2297     // read.
2298     SDValue Op0 = N->getOperand(0);
2299     SDValue Op1 = N->getOperand(1);
2300     SDValue ShAmt = N->getOperand(2);
2301     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
2302     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
2303     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
2304         SimplifyDemandedBits(Op1, OpMask, DCI) ||
2305         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
2306       if (N->getOpcode() != ISD::DELETED_NODE)
2307         DCI.AddToWorklist(N);
2308       return SDValue(N, 0);
2309     }
2310     break;
2311   }
2312   case RISCVISD::GREVIW:
2313   case RISCVISD::GORCIW: {
2314     // Only the lower 32 bits of the first operand are read
2315     SDValue Op0 = N->getOperand(0);
2316     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
2317     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
2318       if (N->getOpcode() != ISD::DELETED_NODE)
2319         DCI.AddToWorklist(N);
2320       return SDValue(N, 0);
2321     }
2322 
2323     return combineGREVI_GORCI(N, DCI.DAG);
2324   }
2325   case RISCVISD::FMV_X_ANYEXTW_RV64: {
2326     SDLoc DL(N);
2327     SDValue Op0 = N->getOperand(0);
2328     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
2329     // conversion is unnecessary and can be replaced with an ANY_EXTEND
2330     // of the FMV_W_X_RV64 operand.
2331     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
2332       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
2333              "Unexpected value type!");
2334       return Op0.getOperand(0);
2335     }
2336 
2337     // This is a target-specific version of a DAGCombine performed in
2338     // DAGCombiner::visitBITCAST. It performs the equivalent of:
2339     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
2340     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
2341     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
2342         !Op0.getNode()->hasOneUse())
2343       break;
2344     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
2345                                  Op0.getOperand(0));
2346     APInt SignBit = APInt::getSignMask(32).sext(64);
2347     if (Op0.getOpcode() == ISD::FNEG)
2348       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
2349                          DAG.getConstant(SignBit, DL, MVT::i64));
2350 
2351     assert(Op0.getOpcode() == ISD::FABS);
2352     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
2353                        DAG.getConstant(~SignBit, DL, MVT::i64));
2354   }
2355   case RISCVISD::GREVI:
2356   case RISCVISD::GORCI:
2357     return combineGREVI_GORCI(N, DCI.DAG);
2358   case ISD::OR:
2359     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
2360       return GREV;
2361     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
2362       return GORC;
2363     break;
2364   case RISCVISD::SELECT_CC: {
2365     // Transform
2366     // (select_cc (xor X, 1), 0, setne, trueV, falseV) ->
2367     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
2368     // This can occur when legalizing some floating point comparisons.
2369     SDValue LHS = N->getOperand(0);
2370     SDValue RHS = N->getOperand(1);
2371     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
2372     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
2373     if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) &&
2374         LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) &&
2375         DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) {
2376       SDLoc DL(N);
2377       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
2378       SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
2379       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
2380                          {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3),
2381                           N->getOperand(4)});
2382     }
2383     break;
2384   }
2385   case ISD::SETCC: {
2386     // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1.
2387     // Comparing with 0 may allow us to fold into bnez/beqz.
2388     SDValue LHS = N->getOperand(0);
2389     SDValue RHS = N->getOperand(1);
2390     if (LHS.getValueType().isScalableVector())
2391       break;
2392     auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2393     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
2394     if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) &&
2395         DAG.MaskedValueIsZero(LHS, Mask)) {
2396       SDLoc DL(N);
2397       SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType());
2398       CC = ISD::getSetCCInverse(CC, LHS.getValueType());
2399       return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC);
2400     }
2401     break;
2402   }
2403   }
2404 
2405   return SDValue();
2406 }
2407 
2408 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
2409     const SDNode *N, CombineLevel Level) const {
2410   // The following folds are only desirable if `(OP _, c1 << c2)` can be
2411   // materialised in fewer instructions than `(OP _, c1)`:
2412   //
2413   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2414   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2415   SDValue N0 = N->getOperand(0);
2416   EVT Ty = N0.getValueType();
2417   if (Ty.isScalarInteger() &&
2418       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
2419     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2420     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
2421     if (C1 && C2) {
2422       const APInt &C1Int = C1->getAPIntValue();
2423       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
2424 
2425       // We can materialise `c1 << c2` into an add immediate, so it's "free",
2426       // and the combine should happen, to potentially allow further combines
2427       // later.
2428       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
2429           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
2430         return true;
2431 
2432       // We can materialise `c1` in an add immediate, so it's "free", and the
2433       // combine should be prevented.
2434       if (C1Int.getMinSignedBits() <= 64 &&
2435           isLegalAddImmediate(C1Int.getSExtValue()))
2436         return false;
2437 
2438       // Neither constant will fit into an immediate, so find materialisation
2439       // costs.
2440       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
2441                                               Subtarget.is64Bit());
2442       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
2443           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
2444 
2445       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
2446       // combine should be prevented.
2447       if (C1Cost < ShiftedC1Cost)
2448         return false;
2449     }
2450   }
2451   return true;
2452 }
2453 
2454 bool RISCVTargetLowering::targetShrinkDemandedConstant(
2455     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2456     TargetLoweringOpt &TLO) const {
2457   // Delay this optimization as late as possible.
2458   if (!TLO.LegalOps)
2459     return false;
2460 
2461   EVT VT = Op.getValueType();
2462   if (VT.isVector())
2463     return false;
2464 
2465   // Only handle AND for now.
2466   if (Op.getOpcode() != ISD::AND)
2467     return false;
2468 
2469   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2470   if (!C)
2471     return false;
2472 
2473   const APInt &Mask = C->getAPIntValue();
2474 
2475   // Clear all non-demanded bits initially.
2476   APInt ShrunkMask = Mask & DemandedBits;
2477 
2478   // If the shrunk mask fits in sign extended 12 bits, let the target
2479   // independent code apply it.
2480   if (ShrunkMask.isSignedIntN(12))
2481     return false;
2482 
2483   // Try to make a smaller immediate by setting undemanded bits.
2484 
2485   // We need to be able to make a negative number through a combination of mask
2486   // and undemanded bits.
2487   APInt ExpandedMask = Mask | ~DemandedBits;
2488   if (!ExpandedMask.isNegative())
2489     return false;
2490 
2491   // What is the fewest number of bits we need to represent the negative number.
2492   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
2493 
2494   // Try to make a 12 bit negative immediate. If that fails try to make a 32
2495   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
2496   APInt NewMask = ShrunkMask;
2497   if (MinSignedBits <= 12)
2498     NewMask.setBitsFrom(11);
2499   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
2500     NewMask.setBitsFrom(31);
2501   else
2502     return false;
2503 
2504   // Sanity check that our new mask is a subset of the demanded mask.
2505   assert(NewMask.isSubsetOf(ExpandedMask));
2506 
2507   // If we aren't changing the mask, just return true to keep it and prevent
2508   // the caller from optimizing.
2509   if (NewMask == Mask)
2510     return true;
2511 
2512   // Replace the constant with the new mask.
2513   SDLoc DL(Op);
2514   SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
2515   SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
2516   return TLO.CombineTo(Op, NewOp);
2517 }
2518 
2519 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2520                                                         KnownBits &Known,
2521                                                         const APInt &DemandedElts,
2522                                                         const SelectionDAG &DAG,
2523                                                         unsigned Depth) const {
2524   unsigned BitWidth = Known.getBitWidth();
2525   unsigned Opc = Op.getOpcode();
2526   assert((Opc >= ISD::BUILTIN_OP_END ||
2527           Opc == ISD::INTRINSIC_WO_CHAIN ||
2528           Opc == ISD::INTRINSIC_W_CHAIN ||
2529           Opc == ISD::INTRINSIC_VOID) &&
2530          "Should use MaskedValueIsZero if you don't know whether Op"
2531          " is a target node!");
2532 
2533   Known.resetAll();
2534   switch (Opc) {
2535   default: break;
2536   case RISCVISD::REMUW: {
2537     KnownBits Known2;
2538     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
2539     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
2540     // We only care about the lower 32 bits.
2541     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
2542     // Restore the original width by sign extending.
2543     Known = Known.sext(BitWidth);
2544     break;
2545   }
2546   case RISCVISD::DIVUW: {
2547     KnownBits Known2;
2548     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
2549     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
2550     // We only care about the lower 32 bits.
2551     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
2552     // Restore the original width by sign extending.
2553     Known = Known.sext(BitWidth);
2554     break;
2555   }
2556   case RISCVISD::READ_VLENB:
2557     // We assume VLENB is at least 8 bytes.
2558     // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
2559     Known.Zero.setLowBits(3);
2560     break;
2561   }
2562 }
2563 
2564 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
2565     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
2566     unsigned Depth) const {
2567   switch (Op.getOpcode()) {
2568   default:
2569     break;
2570   case RISCVISD::SLLW:
2571   case RISCVISD::SRAW:
2572   case RISCVISD::SRLW:
2573   case RISCVISD::DIVW:
2574   case RISCVISD::DIVUW:
2575   case RISCVISD::REMUW:
2576   case RISCVISD::ROLW:
2577   case RISCVISD::RORW:
2578   case RISCVISD::GREVIW:
2579   case RISCVISD::GORCIW:
2580   case RISCVISD::FSLW:
2581   case RISCVISD::FSRW:
2582     // TODO: As the result is sign-extended, this is conservatively correct. A
2583     // more precise answer could be calculated for SRAW depending on known
2584     // bits in the shift amount.
2585     return 33;
2586   case RISCVISD::VMV_X_S:
2587     // The number of sign bits of the scalar result is computed by obtaining the
2588     // element type of the input vector operand, subtracting its width from the
2589     // XLEN, and then adding one (sign bit within the element type). If the
2590     // element type is wider than XLen, the least-significant XLEN bits are
2591     // taken.
2592     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
2593       return 1;
2594     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
2595   }
2596 
2597   return 1;
2598 }
2599 
2600 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
2601                                                   MachineBasicBlock *BB) {
2602   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
2603 
2604   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
2605   // Should the count have wrapped while it was being read, we need to try
2606   // again.
2607   // ...
2608   // read:
2609   // rdcycleh x3 # load high word of cycle
2610   // rdcycle  x2 # load low word of cycle
2611   // rdcycleh x4 # load high word of cycle
2612   // bne x3, x4, read # check if high word reads match, otherwise try again
2613   // ...
2614 
2615   MachineFunction &MF = *BB->getParent();
2616   const BasicBlock *LLVM_BB = BB->getBasicBlock();
2617   MachineFunction::iterator It = ++BB->getIterator();
2618 
2619   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
2620   MF.insert(It, LoopMBB);
2621 
2622   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
2623   MF.insert(It, DoneMBB);
2624 
2625   // Transfer the remainder of BB and its successor edges to DoneMBB.
2626   DoneMBB->splice(DoneMBB->begin(), BB,
2627                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
2628   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
2629 
2630   BB->addSuccessor(LoopMBB);
2631 
2632   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2633   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2634   Register LoReg = MI.getOperand(0).getReg();
2635   Register HiReg = MI.getOperand(1).getReg();
2636   DebugLoc DL = MI.getDebugLoc();
2637 
2638   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
2639   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
2640       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
2641       .addReg(RISCV::X0);
2642   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
2643       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
2644       .addReg(RISCV::X0);
2645   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
2646       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
2647       .addReg(RISCV::X0);
2648 
2649   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
2650       .addReg(HiReg)
2651       .addReg(ReadAgainReg)
2652       .addMBB(LoopMBB);
2653 
2654   LoopMBB->addSuccessor(LoopMBB);
2655   LoopMBB->addSuccessor(DoneMBB);
2656 
2657   MI.eraseFromParent();
2658 
2659   return DoneMBB;
2660 }
2661 
2662 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
2663                                              MachineBasicBlock *BB) {
2664   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
2665 
2666   MachineFunction &MF = *BB->getParent();
2667   DebugLoc DL = MI.getDebugLoc();
2668   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2669   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
2670   Register LoReg = MI.getOperand(0).getReg();
2671   Register HiReg = MI.getOperand(1).getReg();
2672   Register SrcReg = MI.getOperand(2).getReg();
2673   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
2674   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
2675 
2676   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
2677                           RI);
2678   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
2679   MachineMemOperand *MMOLo =
2680       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
2681   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
2682       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
2683   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
2684       .addFrameIndex(FI)
2685       .addImm(0)
2686       .addMemOperand(MMOLo);
2687   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
2688       .addFrameIndex(FI)
2689       .addImm(4)
2690       .addMemOperand(MMOHi);
2691   MI.eraseFromParent(); // The pseudo instruction is gone now.
2692   return BB;
2693 }
2694 
2695 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
2696                                                  MachineBasicBlock *BB) {
2697   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
2698          "Unexpected instruction");
2699 
2700   MachineFunction &MF = *BB->getParent();
2701   DebugLoc DL = MI.getDebugLoc();
2702   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2703   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
2704   Register DstReg = MI.getOperand(0).getReg();
2705   Register LoReg = MI.getOperand(1).getReg();
2706   Register HiReg = MI.getOperand(2).getReg();
2707   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
2708   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
2709 
2710   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
2711   MachineMemOperand *MMOLo =
2712       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
2713   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
2714       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
2715   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
2716       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
2717       .addFrameIndex(FI)
2718       .addImm(0)
2719       .addMemOperand(MMOLo);
2720   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
2721       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
2722       .addFrameIndex(FI)
2723       .addImm(4)
2724       .addMemOperand(MMOHi);
2725   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
2726   MI.eraseFromParent(); // The pseudo instruction is gone now.
2727   return BB;
2728 }
2729 
2730 static bool isSelectPseudo(MachineInstr &MI) {
2731   switch (MI.getOpcode()) {
2732   default:
2733     return false;
2734   case RISCV::Select_GPR_Using_CC_GPR:
2735   case RISCV::Select_FPR16_Using_CC_GPR:
2736   case RISCV::Select_FPR32_Using_CC_GPR:
2737   case RISCV::Select_FPR64_Using_CC_GPR:
2738     return true;
2739   }
2740 }
2741 
2742 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
2743                                            MachineBasicBlock *BB) {
2744   // To "insert" Select_* instructions, we actually have to insert the triangle
2745   // control-flow pattern.  The incoming instructions know the destination vreg
2746   // to set, the condition code register to branch on, the true/false values to
2747   // select between, and the condcode to use to select the appropriate branch.
2748   //
2749   // We produce the following control flow:
2750   //     HeadMBB
2751   //     |  \
2752   //     |  IfFalseMBB
2753   //     | /
2754   //    TailMBB
2755   //
2756   // When we find a sequence of selects we attempt to optimize their emission
2757   // by sharing the control flow. Currently we only handle cases where we have
2758   // multiple selects with the exact same condition (same LHS, RHS and CC).
2759   // The selects may be interleaved with other instructions if the other
2760   // instructions meet some requirements we deem safe:
2761   // - They are debug instructions. Otherwise,
2762   // - They do not have side-effects, do not access memory and their inputs do
2763   //   not depend on the results of the select pseudo-instructions.
2764   // The TrueV/FalseV operands of the selects cannot depend on the result of
2765   // previous selects in the sequence.
2766   // These conditions could be further relaxed. See the X86 target for a
2767   // related approach and more information.
2768   Register LHS = MI.getOperand(1).getReg();
2769   Register RHS = MI.getOperand(2).getReg();
2770   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
2771 
2772   SmallVector<MachineInstr *, 4> SelectDebugValues;
2773   SmallSet<Register, 4> SelectDests;
2774   SelectDests.insert(MI.getOperand(0).getReg());
2775 
2776   MachineInstr *LastSelectPseudo = &MI;
2777 
2778   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
2779        SequenceMBBI != E; ++SequenceMBBI) {
2780     if (SequenceMBBI->isDebugInstr())
2781       continue;
2782     else if (isSelectPseudo(*SequenceMBBI)) {
2783       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
2784           SequenceMBBI->getOperand(2).getReg() != RHS ||
2785           SequenceMBBI->getOperand(3).getImm() != CC ||
2786           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
2787           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
2788         break;
2789       LastSelectPseudo = &*SequenceMBBI;
2790       SequenceMBBI->collectDebugValues(SelectDebugValues);
2791       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
2792     } else {
2793       if (SequenceMBBI->hasUnmodeledSideEffects() ||
2794           SequenceMBBI->mayLoadOrStore())
2795         break;
2796       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
2797             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
2798           }))
2799         break;
2800     }
2801   }
2802 
2803   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
2804   const BasicBlock *LLVM_BB = BB->getBasicBlock();
2805   DebugLoc DL = MI.getDebugLoc();
2806   MachineFunction::iterator I = ++BB->getIterator();
2807 
2808   MachineBasicBlock *HeadMBB = BB;
2809   MachineFunction *F = BB->getParent();
2810   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
2811   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
2812 
2813   F->insert(I, IfFalseMBB);
2814   F->insert(I, TailMBB);
2815 
2816   // Transfer debug instructions associated with the selects to TailMBB.
2817   for (MachineInstr *DebugInstr : SelectDebugValues) {
2818     TailMBB->push_back(DebugInstr->removeFromParent());
2819   }
2820 
2821   // Move all instructions after the sequence to TailMBB.
2822   TailMBB->splice(TailMBB->end(), HeadMBB,
2823                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
2824   // Update machine-CFG edges by transferring all successors of the current
2825   // block to the new block which will contain the Phi nodes for the selects.
2826   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
2827   // Set the successors for HeadMBB.
2828   HeadMBB->addSuccessor(IfFalseMBB);
2829   HeadMBB->addSuccessor(TailMBB);
2830 
2831   // Insert appropriate branch.
2832   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
2833 
2834   BuildMI(HeadMBB, DL, TII.get(Opcode))
2835     .addReg(LHS)
2836     .addReg(RHS)
2837     .addMBB(TailMBB);
2838 
2839   // IfFalseMBB just falls through to TailMBB.
2840   IfFalseMBB->addSuccessor(TailMBB);
2841 
2842   // Create PHIs for all of the select pseudo-instructions.
2843   auto SelectMBBI = MI.getIterator();
2844   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
2845   auto InsertionPoint = TailMBB->begin();
2846   while (SelectMBBI != SelectEnd) {
2847     auto Next = std::next(SelectMBBI);
2848     if (isSelectPseudo(*SelectMBBI)) {
2849       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
2850       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
2851               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
2852           .addReg(SelectMBBI->getOperand(4).getReg())
2853           .addMBB(HeadMBB)
2854           .addReg(SelectMBBI->getOperand(5).getReg())
2855           .addMBB(IfFalseMBB);
2856       SelectMBBI->eraseFromParent();
2857     }
2858     SelectMBBI = Next;
2859   }
2860 
2861   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
2862   return TailMBB;
2863 }
2864 
2865 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
2866                                     int VLIndex, unsigned SEWIndex,
2867                                     RISCVVLMUL VLMul, bool WritesElement0) {
2868   MachineFunction &MF = *BB->getParent();
2869   DebugLoc DL = MI.getDebugLoc();
2870   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2871 
2872   unsigned SEW = MI.getOperand(SEWIndex).getImm();
2873   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
2874   RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
2875 
2876   MachineRegisterInfo &MRI = MF.getRegInfo();
2877 
2878   // VL and VTYPE are alive here.
2879   MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI));
2880 
2881   if (VLIndex >= 0) {
2882     // Set VL (rs1 != X0).
2883     Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2884     MIB.addReg(DestReg, RegState::Define | RegState::Dead)
2885         .addReg(MI.getOperand(VLIndex).getReg());
2886   } else
2887     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
2888     MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead)
2889         .addReg(RISCV::X0, RegState::Kill);
2890 
2891   // Default to tail agnostic unless the destination is tied to a source. In
2892   // that case the user would have some control over the tail values. The tail
2893   // policy is also ignored on instructions that only update element 0 like
2894   // vmv.s.x or reductions so use agnostic there to match the common case.
2895   // FIXME: This is conservatively correct, but we might want to detect that
2896   // the input is undefined.
2897   bool TailAgnostic = true;
2898   unsigned UseOpIdx;
2899   if (MI.isRegTiedToUseOperand(0, &UseOpIdx) && !WritesElement0) {
2900     TailAgnostic = false;
2901     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
2902     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
2903     MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
2904     if (UseMI && UseMI->isImplicitDef())
2905       TailAgnostic = true;
2906   }
2907 
2908   // For simplicity we reuse the vtype representation here.
2909   MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
2910                                      /*TailAgnostic*/ TailAgnostic,
2911                                      /*MaskAgnostic*/ false));
2912 
2913   // Remove (now) redundant operands from pseudo
2914   MI.getOperand(SEWIndex).setImm(-1);
2915   if (VLIndex >= 0) {
2916     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
2917     MI.getOperand(VLIndex).setIsKill(false);
2918   }
2919 
2920   return BB;
2921 }
2922 
2923 MachineBasicBlock *
2924 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2925                                                  MachineBasicBlock *BB) const {
2926   uint64_t TSFlags = MI.getDesc().TSFlags;
2927 
2928   if (TSFlags & RISCVII::HasSEWOpMask) {
2929     unsigned NumOperands = MI.getNumExplicitOperands();
2930     int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
2931     unsigned SEWIndex = NumOperands - 1;
2932     bool WritesElement0 = TSFlags & RISCVII::WritesElement0Mask;
2933 
2934     RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
2935                                                RISCVII::VLMulShift);
2936     return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, WritesElement0);
2937   }
2938 
2939   switch (MI.getOpcode()) {
2940   default:
2941     llvm_unreachable("Unexpected instr type to insert");
2942   case RISCV::ReadCycleWide:
2943     assert(!Subtarget.is64Bit() &&
2944            "ReadCycleWrite is only to be used on riscv32");
2945     return emitReadCycleWidePseudo(MI, BB);
2946   case RISCV::Select_GPR_Using_CC_GPR:
2947   case RISCV::Select_FPR16_Using_CC_GPR:
2948   case RISCV::Select_FPR32_Using_CC_GPR:
2949   case RISCV::Select_FPR64_Using_CC_GPR:
2950     return emitSelectPseudo(MI, BB);
2951   case RISCV::BuildPairF64Pseudo:
2952     return emitBuildPairF64Pseudo(MI, BB);
2953   case RISCV::SplitF64Pseudo:
2954     return emitSplitF64Pseudo(MI, BB);
2955   }
2956 }
2957 
2958 // Calling Convention Implementation.
2959 // The expectations for frontend ABI lowering vary from target to target.
2960 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
2961 // details, but this is a longer term goal. For now, we simply try to keep the
2962 // role of the frontend as simple and well-defined as possible. The rules can
2963 // be summarised as:
2964 // * Never split up large scalar arguments. We handle them here.
2965 // * If a hardfloat calling convention is being used, and the struct may be
2966 // passed in a pair of registers (fp+fp, int+fp), and both registers are
2967 // available, then pass as two separate arguments. If either the GPRs or FPRs
2968 // are exhausted, then pass according to the rule below.
2969 // * If a struct could never be passed in registers or directly in a stack
2970 // slot (as it is larger than 2*XLEN and the floating point rules don't
2971 // apply), then pass it using a pointer with the byval attribute.
2972 // * If a struct is less than 2*XLEN, then coerce to either a two-element
2973 // word-sized array or a 2*XLEN scalar (depending on alignment).
2974 // * The frontend can determine whether a struct is returned by reference or
2975 // not based on its size and fields. If it will be returned by reference, the
2976 // frontend must modify the prototype so a pointer with the sret annotation is
2977 // passed as the first argument. This is not necessary for large scalar
2978 // returns.
2979 // * Struct return values and varargs should be coerced to structs containing
2980 // register-size fields in the same situations they would be for fixed
2981 // arguments.
2982 
2983 static const MCPhysReg ArgGPRs[] = {
2984   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
2985   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
2986 };
2987 static const MCPhysReg ArgFPR16s[] = {
2988   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
2989   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
2990 };
2991 static const MCPhysReg ArgFPR32s[] = {
2992   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
2993   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
2994 };
2995 static const MCPhysReg ArgFPR64s[] = {
2996   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
2997   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
2998 };
2999 // This is an interim calling convention and it may be changed in the future.
3000 static const MCPhysReg ArgVRs[] = {
3001     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
3002     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
3003     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
3004 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
3005                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
3006                                      RISCV::V20M2, RISCV::V22M2};
3007 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
3008                                      RISCV::V20M4};
3009 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
3010 
3011 // Pass a 2*XLEN argument that has been split into two XLEN values through
3012 // registers or the stack as necessary.
3013 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
3014                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
3015                                 MVT ValVT2, MVT LocVT2,
3016                                 ISD::ArgFlagsTy ArgFlags2) {
3017   unsigned XLenInBytes = XLen / 8;
3018   if (Register Reg = State.AllocateReg(ArgGPRs)) {
3019     // At least one half can be passed via register.
3020     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3021                                      VA1.getLocVT(), CCValAssign::Full));
3022   } else {
3023     // Both halves must be passed on the stack, with proper alignment.
3024     Align StackAlign =
3025         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3026     State.addLoc(
3027         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
3028                             State.AllocateStack(XLenInBytes, StackAlign),
3029                             VA1.getLocVT(), CCValAssign::Full));
3030     State.addLoc(CCValAssign::getMem(
3031         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
3032         LocVT2, CCValAssign::Full));
3033     return false;
3034   }
3035 
3036   if (Register Reg = State.AllocateReg(ArgGPRs)) {
3037     // The second half can also be passed via register.
3038     State.addLoc(
3039         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3040   } else {
3041     // The second half is passed via the stack, without additional alignment.
3042     State.addLoc(CCValAssign::getMem(
3043         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
3044         LocVT2, CCValAssign::Full));
3045   }
3046 
3047   return false;
3048 }
3049 
3050 // Implements the RISC-V calling convention. Returns true upon failure.
3051 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
3052                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
3053                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
3054                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
3055                      Optional<unsigned> FirstMaskArgument) {
3056   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
3057   assert(XLen == 32 || XLen == 64);
3058   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
3059 
3060   // Any return value split in to more than two values can't be returned
3061   // directly.
3062   if (IsRet && ValNo > 1)
3063     return true;
3064 
3065   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
3066   // variadic argument, or if no F16/F32 argument registers are available.
3067   bool UseGPRForF16_F32 = true;
3068   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
3069   // variadic argument, or if no F64 argument registers are available.
3070   bool UseGPRForF64 = true;
3071 
3072   switch (ABI) {
3073   default:
3074     llvm_unreachable("Unexpected ABI");
3075   case RISCVABI::ABI_ILP32:
3076   case RISCVABI::ABI_LP64:
3077     break;
3078   case RISCVABI::ABI_ILP32F:
3079   case RISCVABI::ABI_LP64F:
3080     UseGPRForF16_F32 = !IsFixed;
3081     break;
3082   case RISCVABI::ABI_ILP32D:
3083   case RISCVABI::ABI_LP64D:
3084     UseGPRForF16_F32 = !IsFixed;
3085     UseGPRForF64 = !IsFixed;
3086     break;
3087   }
3088 
3089   // FPR16, FPR32, and FPR64 alias each other.
3090   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
3091     UseGPRForF16_F32 = true;
3092     UseGPRForF64 = true;
3093   }
3094 
3095   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
3096   // similar local variables rather than directly checking against the target
3097   // ABI.
3098 
3099   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
3100     LocVT = XLenVT;
3101     LocInfo = CCValAssign::BCvt;
3102   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
3103     LocVT = MVT::i64;
3104     LocInfo = CCValAssign::BCvt;
3105   }
3106 
3107   // If this is a variadic argument, the RISC-V calling convention requires
3108   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
3109   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
3110   // be used regardless of whether the original argument was split during
3111   // legalisation or not. The argument will not be passed by registers if the
3112   // original type is larger than 2*XLEN, so the register alignment rule does
3113   // not apply.
3114   unsigned TwoXLenInBytes = (2 * XLen) / 8;
3115   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
3116       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
3117     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3118     // Skip 'odd' register if necessary.
3119     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
3120       State.AllocateReg(ArgGPRs);
3121   }
3122 
3123   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3124   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3125       State.getPendingArgFlags();
3126 
3127   assert(PendingLocs.size() == PendingArgFlags.size() &&
3128          "PendingLocs and PendingArgFlags out of sync");
3129 
3130   // Handle passing f64 on RV32D with a soft float ABI or when floating point
3131   // registers are exhausted.
3132   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
3133     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
3134            "Can't lower f64 if it is split");
3135     // Depending on available argument GPRS, f64 may be passed in a pair of
3136     // GPRs, split between a GPR and the stack, or passed completely on the
3137     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
3138     // cases.
3139     Register Reg = State.AllocateReg(ArgGPRs);
3140     LocVT = MVT::i32;
3141     if (!Reg) {
3142       unsigned StackOffset = State.AllocateStack(8, Align(8));
3143       State.addLoc(
3144           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3145       return false;
3146     }
3147     if (!State.AllocateReg(ArgGPRs))
3148       State.AllocateStack(4, Align(4));
3149     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3150     return false;
3151   }
3152 
3153   // Split arguments might be passed indirectly, so keep track of the pending
3154   // values.
3155   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
3156     LocVT = XLenVT;
3157     LocInfo = CCValAssign::Indirect;
3158     PendingLocs.push_back(
3159         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3160     PendingArgFlags.push_back(ArgFlags);
3161     if (!ArgFlags.isSplitEnd()) {
3162       return false;
3163     }
3164   }
3165 
3166   // If the split argument only had two elements, it should be passed directly
3167   // in registers or on the stack.
3168   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
3169     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3170     // Apply the normal calling convention rules to the first half of the
3171     // split argument.
3172     CCValAssign VA = PendingLocs[0];
3173     ISD::ArgFlagsTy AF = PendingArgFlags[0];
3174     PendingLocs.clear();
3175     PendingArgFlags.clear();
3176     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
3177                                ArgFlags);
3178   }
3179 
3180   // Allocate to a register if possible, or else a stack slot.
3181   Register Reg;
3182   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
3183     Reg = State.AllocateReg(ArgFPR16s);
3184   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
3185     Reg = State.AllocateReg(ArgFPR32s);
3186   else if (ValVT == MVT::f64 && !UseGPRForF64)
3187     Reg = State.AllocateReg(ArgFPR64s);
3188   else if (ValVT.isScalableVector()) {
3189     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
3190     if (RC == &RISCV::VRRegClass) {
3191       // Assign the first mask argument to V0.
3192       // This is an interim calling convention and it may be changed in the
3193       // future.
3194       if (FirstMaskArgument.hasValue() &&
3195           ValNo == FirstMaskArgument.getValue()) {
3196         Reg = State.AllocateReg(RISCV::V0);
3197       } else {
3198         Reg = State.AllocateReg(ArgVRs);
3199       }
3200     } else if (RC == &RISCV::VRM2RegClass) {
3201       Reg = State.AllocateReg(ArgVRM2s);
3202     } else if (RC == &RISCV::VRM4RegClass) {
3203       Reg = State.AllocateReg(ArgVRM4s);
3204     } else if (RC == &RISCV::VRM8RegClass) {
3205       Reg = State.AllocateReg(ArgVRM8s);
3206     } else {
3207       llvm_unreachable("Unhandled class register for ValueType");
3208     }
3209     if (!Reg) {
3210       LocInfo = CCValAssign::Indirect;
3211       // Try using a GPR to pass the address
3212       Reg = State.AllocateReg(ArgGPRs);
3213       LocVT = XLenVT;
3214     }
3215   } else
3216     Reg = State.AllocateReg(ArgGPRs);
3217   unsigned StackOffset =
3218       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
3219 
3220   // If we reach this point and PendingLocs is non-empty, we must be at the
3221   // end of a split argument that must be passed indirectly.
3222   if (!PendingLocs.empty()) {
3223     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3224     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3225 
3226     for (auto &It : PendingLocs) {
3227       if (Reg)
3228         It.convertToReg(Reg);
3229       else
3230         It.convertToMem(StackOffset);
3231       State.addLoc(It);
3232     }
3233     PendingLocs.clear();
3234     PendingArgFlags.clear();
3235     return false;
3236   }
3237 
3238   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
3239           (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
3240          "Expected an XLenVT or scalable vector types at this stage");
3241 
3242   if (Reg) {
3243     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3244     return false;
3245   }
3246 
3247   // When a floating-point value is passed on the stack, no bit-conversion is
3248   // needed.
3249   if (ValVT.isFloatingPoint()) {
3250     LocVT = ValVT;
3251     LocInfo = CCValAssign::Full;
3252   }
3253   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3254   return false;
3255 }
3256 
3257 template <typename ArgTy>
3258 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
3259   for (const auto &ArgIdx : enumerate(Args)) {
3260     MVT ArgVT = ArgIdx.value().VT;
3261     if (ArgVT.isScalableVector() &&
3262         ArgVT.getVectorElementType().SimpleTy == MVT::i1)
3263       return ArgIdx.index();
3264   }
3265   return None;
3266 }
3267 
3268 void RISCVTargetLowering::analyzeInputArgs(
3269     MachineFunction &MF, CCState &CCInfo,
3270     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
3271   unsigned NumArgs = Ins.size();
3272   FunctionType *FType = MF.getFunction().getFunctionType();
3273 
3274   Optional<unsigned> FirstMaskArgument;
3275   if (Subtarget.hasStdExtV())
3276     FirstMaskArgument = preAssignMask(Ins);
3277 
3278   for (unsigned i = 0; i != NumArgs; ++i) {
3279     MVT ArgVT = Ins[i].VT;
3280     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
3281 
3282     Type *ArgTy = nullptr;
3283     if (IsRet)
3284       ArgTy = FType->getReturnType();
3285     else if (Ins[i].isOrigArg())
3286       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3287 
3288     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3289     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
3290                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
3291                  FirstMaskArgument)) {
3292       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
3293                         << EVT(ArgVT).getEVTString() << '\n');
3294       llvm_unreachable(nullptr);
3295     }
3296   }
3297 }
3298 
3299 void RISCVTargetLowering::analyzeOutputArgs(
3300     MachineFunction &MF, CCState &CCInfo,
3301     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3302     CallLoweringInfo *CLI) const {
3303   unsigned NumArgs = Outs.size();
3304 
3305   Optional<unsigned> FirstMaskArgument;
3306   if (Subtarget.hasStdExtV())
3307     FirstMaskArgument = preAssignMask(Outs);
3308 
3309   for (unsigned i = 0; i != NumArgs; i++) {
3310     MVT ArgVT = Outs[i].VT;
3311     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3312     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3313 
3314     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3315     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
3316                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
3317                  FirstMaskArgument)) {
3318       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
3319                         << EVT(ArgVT).getEVTString() << "\n");
3320       llvm_unreachable(nullptr);
3321     }
3322   }
3323 }
3324 
3325 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3326 // values.
3327 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
3328                                    const CCValAssign &VA, const SDLoc &DL) {
3329   switch (VA.getLocInfo()) {
3330   default:
3331     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3332   case CCValAssign::Full:
3333     break;
3334   case CCValAssign::BCvt:
3335     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
3336       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
3337     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3338       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
3339     else
3340       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3341     break;
3342   }
3343   return Val;
3344 }
3345 
3346 // The caller is responsible for loading the full value if the argument is
3347 // passed with CCValAssign::Indirect.
3348 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
3349                                 const CCValAssign &VA, const SDLoc &DL,
3350                                 const RISCVTargetLowering &TLI) {
3351   MachineFunction &MF = DAG.getMachineFunction();
3352   MachineRegisterInfo &RegInfo = MF.getRegInfo();
3353   EVT LocVT = VA.getLocVT();
3354   SDValue Val;
3355   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3356   Register VReg = RegInfo.createVirtualRegister(RC);
3357   RegInfo.addLiveIn(VA.getLocReg(), VReg);
3358   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3359 
3360   if (VA.getLocInfo() == CCValAssign::Indirect)
3361     return Val;
3362 
3363   return convertLocVTToValVT(DAG, Val, VA, DL);
3364 }
3365 
3366 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
3367                                    const CCValAssign &VA, const SDLoc &DL) {
3368   EVT LocVT = VA.getLocVT();
3369 
3370   switch (VA.getLocInfo()) {
3371   default:
3372     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3373   case CCValAssign::Full:
3374     break;
3375   case CCValAssign::BCvt:
3376     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
3377       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
3378     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3379       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
3380     else
3381       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3382     break;
3383   }
3384   return Val;
3385 }
3386 
3387 // The caller is responsible for loading the full value if the argument is
3388 // passed with CCValAssign::Indirect.
3389 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
3390                                 const CCValAssign &VA, const SDLoc &DL) {
3391   MachineFunction &MF = DAG.getMachineFunction();
3392   MachineFrameInfo &MFI = MF.getFrameInfo();
3393   EVT LocVT = VA.getLocVT();
3394   EVT ValVT = VA.getValVT();
3395   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
3396   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3397                                  VA.getLocMemOffset(), /*Immutable=*/true);
3398   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3399   SDValue Val;
3400 
3401   ISD::LoadExtType ExtType;
3402   switch (VA.getLocInfo()) {
3403   default:
3404     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3405   case CCValAssign::Full:
3406   case CCValAssign::Indirect:
3407   case CCValAssign::BCvt:
3408     ExtType = ISD::NON_EXTLOAD;
3409     break;
3410   }
3411   Val = DAG.getExtLoad(
3412       ExtType, DL, LocVT, Chain, FIN,
3413       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
3414   return Val;
3415 }
3416 
3417 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
3418                                        const CCValAssign &VA, const SDLoc &DL) {
3419   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
3420          "Unexpected VA");
3421   MachineFunction &MF = DAG.getMachineFunction();
3422   MachineFrameInfo &MFI = MF.getFrameInfo();
3423   MachineRegisterInfo &RegInfo = MF.getRegInfo();
3424 
3425   if (VA.isMemLoc()) {
3426     // f64 is passed on the stack.
3427     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
3428     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
3429     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
3430                        MachinePointerInfo::getFixedStack(MF, FI));
3431   }
3432 
3433   assert(VA.isRegLoc() && "Expected register VA assignment");
3434 
3435   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
3436   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
3437   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
3438   SDValue Hi;
3439   if (VA.getLocReg() == RISCV::X17) {
3440     // Second half of f64 is passed on the stack.
3441     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
3442     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
3443     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
3444                      MachinePointerInfo::getFixedStack(MF, FI));
3445   } else {
3446     // Second half of f64 is passed in another GPR.
3447     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
3448     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
3449     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
3450   }
3451   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
3452 }
3453 
3454 // FastCC has less than 1% performance improvement for some particular
3455 // benchmark. But theoretically, it may has benenfit for some cases.
3456 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
3457                             CCValAssign::LocInfo LocInfo,
3458                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
3459 
3460   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3461     // X5 and X6 might be used for save-restore libcall.
3462     static const MCPhysReg GPRList[] = {
3463         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
3464         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
3465         RISCV::X29, RISCV::X30, RISCV::X31};
3466     if (unsigned Reg = State.AllocateReg(GPRList)) {
3467       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3468       return false;
3469     }
3470   }
3471 
3472   if (LocVT == MVT::f16) {
3473     static const MCPhysReg FPR16List[] = {
3474         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
3475         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
3476         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
3477         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
3478     if (unsigned Reg = State.AllocateReg(FPR16List)) {
3479       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3480       return false;
3481     }
3482   }
3483 
3484   if (LocVT == MVT::f32) {
3485     static const MCPhysReg FPR32List[] = {
3486         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
3487         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
3488         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
3489         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
3490     if (unsigned Reg = State.AllocateReg(FPR32List)) {
3491       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3492       return false;
3493     }
3494   }
3495 
3496   if (LocVT == MVT::f64) {
3497     static const MCPhysReg FPR64List[] = {
3498         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
3499         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
3500         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
3501         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
3502     if (unsigned Reg = State.AllocateReg(FPR64List)) {
3503       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3504       return false;
3505     }
3506   }
3507 
3508   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
3509     unsigned Offset4 = State.AllocateStack(4, Align(4));
3510     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
3511     return false;
3512   }
3513 
3514   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
3515     unsigned Offset5 = State.AllocateStack(8, Align(8));
3516     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
3517     return false;
3518   }
3519 
3520   return true; // CC didn't match.
3521 }
3522 
3523 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3524                          CCValAssign::LocInfo LocInfo,
3525                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
3526 
3527   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3528     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
3529     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
3530     static const MCPhysReg GPRList[] = {
3531         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
3532         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
3533     if (unsigned Reg = State.AllocateReg(GPRList)) {
3534       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3535       return false;
3536     }
3537   }
3538 
3539   if (LocVT == MVT::f32) {
3540     // Pass in STG registers: F1, ..., F6
3541     //                        fs0 ... fs5
3542     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
3543                                           RISCV::F18_F, RISCV::F19_F,
3544                                           RISCV::F20_F, RISCV::F21_F};
3545     if (unsigned Reg = State.AllocateReg(FPR32List)) {
3546       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3547       return false;
3548     }
3549   }
3550 
3551   if (LocVT == MVT::f64) {
3552     // Pass in STG registers: D1, ..., D6
3553     //                        fs6 ... fs11
3554     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
3555                                           RISCV::F24_D, RISCV::F25_D,
3556                                           RISCV::F26_D, RISCV::F27_D};
3557     if (unsigned Reg = State.AllocateReg(FPR64List)) {
3558       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3559       return false;
3560     }
3561   }
3562 
3563   report_fatal_error("No registers left in GHC calling convention");
3564   return true;
3565 }
3566 
3567 // Transform physical registers into virtual registers.
3568 SDValue RISCVTargetLowering::LowerFormalArguments(
3569     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3570     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3571     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3572 
3573   MachineFunction &MF = DAG.getMachineFunction();
3574 
3575   switch (CallConv) {
3576   default:
3577     report_fatal_error("Unsupported calling convention");
3578   case CallingConv::C:
3579   case CallingConv::Fast:
3580     break;
3581   case CallingConv::GHC:
3582     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
3583         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
3584       report_fatal_error(
3585         "GHC calling convention requires the F and D instruction set extensions");
3586   }
3587 
3588   const Function &Func = MF.getFunction();
3589   if (Func.hasFnAttribute("interrupt")) {
3590     if (!Func.arg_empty())
3591       report_fatal_error(
3592         "Functions with the interrupt attribute cannot have arguments!");
3593 
3594     StringRef Kind =
3595       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
3596 
3597     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
3598       report_fatal_error(
3599         "Function interrupt attribute argument not supported!");
3600   }
3601 
3602   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3603   MVT XLenVT = Subtarget.getXLenVT();
3604   unsigned XLenInBytes = Subtarget.getXLen() / 8;
3605   // Used with vargs to acumulate store chains.
3606   std::vector<SDValue> OutChains;
3607 
3608   // Assign locations to all of the incoming arguments.
3609   SmallVector<CCValAssign, 16> ArgLocs;
3610   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3611 
3612   if (CallConv == CallingConv::Fast)
3613     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
3614   else if (CallConv == CallingConv::GHC)
3615     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
3616   else
3617     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
3618 
3619   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3620     CCValAssign &VA = ArgLocs[i];
3621     SDValue ArgValue;
3622     // Passing f64 on RV32D with a soft float ABI must be handled as a special
3623     // case.
3624     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
3625       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
3626     else if (VA.isRegLoc())
3627       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3628     else
3629       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3630 
3631     if (VA.getLocInfo() == CCValAssign::Indirect) {
3632       // If the original argument was split and passed by reference (e.g. i128
3633       // on RV32), we need to load all parts of it here (using the same
3634       // address).
3635       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3636                                    MachinePointerInfo()));
3637       unsigned ArgIndex = Ins[i].OrigArgIndex;
3638       assert(Ins[i].PartOffset == 0);
3639       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3640         CCValAssign &PartVA = ArgLocs[i + 1];
3641         unsigned PartOffset = Ins[i + 1].PartOffset;
3642         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
3643                                       DAG.getIntPtrConstant(PartOffset, DL));
3644         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3645                                      MachinePointerInfo()));
3646         ++i;
3647       }
3648       continue;
3649     }
3650     InVals.push_back(ArgValue);
3651   }
3652 
3653   if (IsVarArg) {
3654     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
3655     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3656     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
3657     MachineFrameInfo &MFI = MF.getFrameInfo();
3658     MachineRegisterInfo &RegInfo = MF.getRegInfo();
3659     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
3660 
3661     // Offset of the first variable argument from stack pointer, and size of
3662     // the vararg save area. For now, the varargs save area is either zero or
3663     // large enough to hold a0-a7.
3664     int VaArgOffset, VarArgsSaveSize;
3665 
3666     // If all registers are allocated, then all varargs must be passed on the
3667     // stack and we don't need to save any argregs.
3668     if (ArgRegs.size() == Idx) {
3669       VaArgOffset = CCInfo.getNextStackOffset();
3670       VarArgsSaveSize = 0;
3671     } else {
3672       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
3673       VaArgOffset = -VarArgsSaveSize;
3674     }
3675 
3676     // Record the frame index of the first variable argument
3677     // which is a value necessary to VASTART.
3678     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
3679     RVFI->setVarArgsFrameIndex(FI);
3680 
3681     // If saving an odd number of registers then create an extra stack slot to
3682     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
3683     // offsets to even-numbered registered remain 2*XLEN-aligned.
3684     if (Idx % 2) {
3685       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
3686       VarArgsSaveSize += XLenInBytes;
3687     }
3688 
3689     // Copy the integer registers that may have been used for passing varargs
3690     // to the vararg save area.
3691     for (unsigned I = Idx; I < ArgRegs.size();
3692          ++I, VaArgOffset += XLenInBytes) {
3693       const Register Reg = RegInfo.createVirtualRegister(RC);
3694       RegInfo.addLiveIn(ArgRegs[I], Reg);
3695       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
3696       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
3697       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3698       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3699                                    MachinePointerInfo::getFixedStack(MF, FI));
3700       cast<StoreSDNode>(Store.getNode())
3701           ->getMemOperand()
3702           ->setValue((Value *)nullptr);
3703       OutChains.push_back(Store);
3704     }
3705     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
3706   }
3707 
3708   // All stores are grouped in one node to allow the matching between
3709   // the size of Ins and InVals. This only happens for vararg functions.
3710   if (!OutChains.empty()) {
3711     OutChains.push_back(Chain);
3712     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3713   }
3714 
3715   return Chain;
3716 }
3717 
3718 /// isEligibleForTailCallOptimization - Check whether the call is eligible
3719 /// for tail call optimization.
3720 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
3721 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
3722     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
3723     const SmallVector<CCValAssign, 16> &ArgLocs) const {
3724 
3725   auto &Callee = CLI.Callee;
3726   auto CalleeCC = CLI.CallConv;
3727   auto &Outs = CLI.Outs;
3728   auto &Caller = MF.getFunction();
3729   auto CallerCC = Caller.getCallingConv();
3730 
3731   // Exception-handling functions need a special set of instructions to
3732   // indicate a return to the hardware. Tail-calling another function would
3733   // probably break this.
3734   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
3735   // should be expanded as new function attributes are introduced.
3736   if (Caller.hasFnAttribute("interrupt"))
3737     return false;
3738 
3739   // Do not tail call opt if the stack is used to pass parameters.
3740   if (CCInfo.getNextStackOffset() != 0)
3741     return false;
3742 
3743   // Do not tail call opt if any parameters need to be passed indirectly.
3744   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
3745   // passed indirectly. So the address of the value will be passed in a
3746   // register, or if not available, then the address is put on the stack. In
3747   // order to pass indirectly, space on the stack often needs to be allocated
3748   // in order to store the value. In this case the CCInfo.getNextStackOffset()
3749   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
3750   // are passed CCValAssign::Indirect.
3751   for (auto &VA : ArgLocs)
3752     if (VA.getLocInfo() == CCValAssign::Indirect)
3753       return false;
3754 
3755   // Do not tail call opt if either caller or callee uses struct return
3756   // semantics.
3757   auto IsCallerStructRet = Caller.hasStructRetAttr();
3758   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3759   if (IsCallerStructRet || IsCalleeStructRet)
3760     return false;
3761 
3762   // Externally-defined functions with weak linkage should not be
3763   // tail-called. The behaviour of branch instructions in this situation (as
3764   // used for tail calls) is implementation-defined, so we cannot rely on the
3765   // linker replacing the tail call with a return.
3766   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3767     const GlobalValue *GV = G->getGlobal();
3768     if (GV->hasExternalWeakLinkage())
3769       return false;
3770   }
3771 
3772   // The callee has to preserve all registers the caller needs to preserve.
3773   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3774   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3775   if (CalleeCC != CallerCC) {
3776     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3777     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3778       return false;
3779   }
3780 
3781   // Byval parameters hand the function a pointer directly into the stack area
3782   // we want to reuse during a tail call. Working around this *is* possible
3783   // but less efficient and uglier in LowerCall.
3784   for (auto &Arg : Outs)
3785     if (Arg.Flags.isByVal())
3786       return false;
3787 
3788   return true;
3789 }
3790 
3791 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
3792 // and output parameter nodes.
3793 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
3794                                        SmallVectorImpl<SDValue> &InVals) const {
3795   SelectionDAG &DAG = CLI.DAG;
3796   SDLoc &DL = CLI.DL;
3797   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3798   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3799   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3800   SDValue Chain = CLI.Chain;
3801   SDValue Callee = CLI.Callee;
3802   bool &IsTailCall = CLI.IsTailCall;
3803   CallingConv::ID CallConv = CLI.CallConv;
3804   bool IsVarArg = CLI.IsVarArg;
3805   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3806   MVT XLenVT = Subtarget.getXLenVT();
3807 
3808   MachineFunction &MF = DAG.getMachineFunction();
3809 
3810   // Analyze the operands of the call, assigning locations to each operand.
3811   SmallVector<CCValAssign, 16> ArgLocs;
3812   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3813 
3814   if (CallConv == CallingConv::Fast)
3815     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
3816   else if (CallConv == CallingConv::GHC)
3817     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
3818   else
3819     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
3820 
3821   // Check if it's really possible to do a tail call.
3822   if (IsTailCall)
3823     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
3824 
3825   if (IsTailCall)
3826     ++NumTailCalls;
3827   else if (CLI.CB && CLI.CB->isMustTailCall())
3828     report_fatal_error("failed to perform tail call elimination on a call "
3829                        "site marked musttail");
3830 
3831   // Get a count of how many bytes are to be pushed on the stack.
3832   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
3833 
3834   // Create local copies for byval args
3835   SmallVector<SDValue, 8> ByValArgs;
3836   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3837     ISD::ArgFlagsTy Flags = Outs[i].Flags;
3838     if (!Flags.isByVal())
3839       continue;
3840 
3841     SDValue Arg = OutVals[i];
3842     unsigned Size = Flags.getByValSize();
3843     Align Alignment = Flags.getNonZeroByValAlign();
3844 
3845     int FI =
3846         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
3847     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3848     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
3849 
3850     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
3851                           /*IsVolatile=*/false,
3852                           /*AlwaysInline=*/false, IsTailCall,
3853                           MachinePointerInfo(), MachinePointerInfo());
3854     ByValArgs.push_back(FIPtr);
3855   }
3856 
3857   if (!IsTailCall)
3858     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
3859 
3860   // Copy argument values to their designated locations.
3861   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
3862   SmallVector<SDValue, 8> MemOpChains;
3863   SDValue StackPtr;
3864   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
3865     CCValAssign &VA = ArgLocs[i];
3866     SDValue ArgValue = OutVals[i];
3867     ISD::ArgFlagsTy Flags = Outs[i].Flags;
3868 
3869     // Handle passing f64 on RV32D with a soft float ABI as a special case.
3870     bool IsF64OnRV32DSoftABI =
3871         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
3872     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
3873       SDValue SplitF64 = DAG.getNode(
3874           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
3875       SDValue Lo = SplitF64.getValue(0);
3876       SDValue Hi = SplitF64.getValue(1);
3877 
3878       Register RegLo = VA.getLocReg();
3879       RegsToPass.push_back(std::make_pair(RegLo, Lo));
3880 
3881       if (RegLo == RISCV::X17) {
3882         // Second half of f64 is passed on the stack.
3883         // Work out the address of the stack slot.
3884         if (!StackPtr.getNode())
3885           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
3886         // Emit the store.
3887         MemOpChains.push_back(
3888             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
3889       } else {
3890         // Second half of f64 is passed in another GPR.
3891         assert(RegLo < RISCV::X31 && "Invalid register pair");
3892         Register RegHigh = RegLo + 1;
3893         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
3894       }
3895       continue;
3896     }
3897 
3898     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
3899     // as any other MemLoc.
3900 
3901     // Promote the value if needed.
3902     // For now, only handle fully promoted and indirect arguments.
3903     if (VA.getLocInfo() == CCValAssign::Indirect) {
3904       // Store the argument in a stack slot and pass its address.
3905       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
3906       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3907       MemOpChains.push_back(
3908           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
3909                        MachinePointerInfo::getFixedStack(MF, FI)));
3910       // If the original argument was split (e.g. i128), we need
3911       // to store all parts of it here (and pass just one address).
3912       unsigned ArgIndex = Outs[i].OrigArgIndex;
3913       assert(Outs[i].PartOffset == 0);
3914       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
3915         SDValue PartValue = OutVals[i + 1];
3916         unsigned PartOffset = Outs[i + 1].PartOffset;
3917         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
3918                                       DAG.getIntPtrConstant(PartOffset, DL));
3919         MemOpChains.push_back(
3920             DAG.getStore(Chain, DL, PartValue, Address,
3921                          MachinePointerInfo::getFixedStack(MF, FI)));
3922         ++i;
3923       }
3924       ArgValue = SpillSlot;
3925     } else {
3926       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
3927     }
3928 
3929     // Use local copy if it is a byval arg.
3930     if (Flags.isByVal())
3931       ArgValue = ByValArgs[j++];
3932 
3933     if (VA.isRegLoc()) {
3934       // Queue up the argument copies and emit them at the end.
3935       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
3936     } else {
3937       assert(VA.isMemLoc() && "Argument not register or memory");
3938       assert(!IsTailCall && "Tail call not allowed if stack is used "
3939                             "for passing parameters");
3940 
3941       // Work out the address of the stack slot.
3942       if (!StackPtr.getNode())
3943         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
3944       SDValue Address =
3945           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
3946                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
3947 
3948       // Emit the store.
3949       MemOpChains.push_back(
3950           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
3951     }
3952   }
3953 
3954   // Join the stores, which are independent of one another.
3955   if (!MemOpChains.empty())
3956     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3957 
3958   SDValue Glue;
3959 
3960   // Build a sequence of copy-to-reg nodes, chained and glued together.
3961   for (auto &Reg : RegsToPass) {
3962     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
3963     Glue = Chain.getValue(1);
3964   }
3965 
3966   // Validate that none of the argument registers have been marked as
3967   // reserved, if so report an error. Do the same for the return address if this
3968   // is not a tailcall.
3969   validateCCReservedRegs(RegsToPass, MF);
3970   if (!IsTailCall &&
3971       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
3972     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3973         MF.getFunction(),
3974         "Return address register required, but has been reserved."});
3975 
3976   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
3977   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
3978   // split it and then direct call can be matched by PseudoCALL.
3979   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
3980     const GlobalValue *GV = S->getGlobal();
3981 
3982     unsigned OpFlags = RISCVII::MO_CALL;
3983     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
3984       OpFlags = RISCVII::MO_PLT;
3985 
3986     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
3987   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3988     unsigned OpFlags = RISCVII::MO_CALL;
3989 
3990     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
3991                                                  nullptr))
3992       OpFlags = RISCVII::MO_PLT;
3993 
3994     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
3995   }
3996 
3997   // The first call operand is the chain and the second is the target address.
3998   SmallVector<SDValue, 8> Ops;
3999   Ops.push_back(Chain);
4000   Ops.push_back(Callee);
4001 
4002   // Add argument registers to the end of the list so that they are
4003   // known live into the call.
4004   for (auto &Reg : RegsToPass)
4005     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
4006 
4007   if (!IsTailCall) {
4008     // Add a register mask operand representing the call-preserved registers.
4009     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4010     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4011     assert(Mask && "Missing call preserved mask for calling convention");
4012     Ops.push_back(DAG.getRegisterMask(Mask));
4013   }
4014 
4015   // Glue the call to the argument copies, if any.
4016   if (Glue.getNode())
4017     Ops.push_back(Glue);
4018 
4019   // Emit the call.
4020   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4021 
4022   if (IsTailCall) {
4023     MF.getFrameInfo().setHasTailCall();
4024     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
4025   }
4026 
4027   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
4028   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4029   Glue = Chain.getValue(1);
4030 
4031   // Mark the end of the call, which is glued to the call itself.
4032   Chain = DAG.getCALLSEQ_END(Chain,
4033                              DAG.getConstant(NumBytes, DL, PtrVT, true),
4034                              DAG.getConstant(0, DL, PtrVT, true),
4035                              Glue, DL);
4036   Glue = Chain.getValue(1);
4037 
4038   // Assign locations to each value returned by this call.
4039   SmallVector<CCValAssign, 16> RVLocs;
4040   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4041   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
4042 
4043   // Copy all of the result registers out of their specified physreg.
4044   for (auto &VA : RVLocs) {
4045     // Copy the value out
4046     SDValue RetValue =
4047         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
4048     // Glue the RetValue to the end of the call sequence
4049     Chain = RetValue.getValue(1);
4050     Glue = RetValue.getValue(2);
4051 
4052     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
4053       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
4054       SDValue RetValue2 =
4055           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
4056       Chain = RetValue2.getValue(1);
4057       Glue = RetValue2.getValue(2);
4058       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
4059                              RetValue2);
4060     }
4061 
4062     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
4063 
4064     InVals.push_back(RetValue);
4065   }
4066 
4067   return Chain;
4068 }
4069 
4070 bool RISCVTargetLowering::CanLowerReturn(
4071     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4072     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4073   SmallVector<CCValAssign, 16> RVLocs;
4074   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4075 
4076   Optional<unsigned> FirstMaskArgument;
4077   if (Subtarget.hasStdExtV())
4078     FirstMaskArgument = preAssignMask(Outs);
4079 
4080   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4081     MVT VT = Outs[i].VT;
4082     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4083     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
4084     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
4085                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
4086                  *this, FirstMaskArgument))
4087       return false;
4088   }
4089   return true;
4090 }
4091 
4092 SDValue
4093 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
4094                                  bool IsVarArg,
4095                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
4096                                  const SmallVectorImpl<SDValue> &OutVals,
4097                                  const SDLoc &DL, SelectionDAG &DAG) const {
4098   const MachineFunction &MF = DAG.getMachineFunction();
4099   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
4100 
4101   // Stores the assignment of the return value to a location.
4102   SmallVector<CCValAssign, 16> RVLocs;
4103 
4104   // Info about the registers and stack slot.
4105   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4106                  *DAG.getContext());
4107 
4108   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4109                     nullptr);
4110 
4111   if (CallConv == CallingConv::GHC && !RVLocs.empty())
4112     report_fatal_error("GHC functions return void only");
4113 
4114   SDValue Glue;
4115   SmallVector<SDValue, 4> RetOps(1, Chain);
4116 
4117   // Copy the result values into the output registers.
4118   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4119     SDValue Val = OutVals[i];
4120     CCValAssign &VA = RVLocs[i];
4121     assert(VA.isRegLoc() && "Can only return in registers!");
4122 
4123     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
4124       // Handle returning f64 on RV32D with a soft float ABI.
4125       assert(VA.isRegLoc() && "Expected return via registers");
4126       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
4127                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
4128       SDValue Lo = SplitF64.getValue(0);
4129       SDValue Hi = SplitF64.getValue(1);
4130       Register RegLo = VA.getLocReg();
4131       assert(RegLo < RISCV::X31 && "Invalid register pair");
4132       Register RegHi = RegLo + 1;
4133 
4134       if (STI.isRegisterReservedByUser(RegLo) ||
4135           STI.isRegisterReservedByUser(RegHi))
4136         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
4137             MF.getFunction(),
4138             "Return value register required, but has been reserved."});
4139 
4140       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
4141       Glue = Chain.getValue(1);
4142       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
4143       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
4144       Glue = Chain.getValue(1);
4145       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
4146     } else {
4147       // Handle a 'normal' return.
4148       Val = convertValVTToLocVT(DAG, Val, VA, DL);
4149       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4150 
4151       if (STI.isRegisterReservedByUser(VA.getLocReg()))
4152         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
4153             MF.getFunction(),
4154             "Return value register required, but has been reserved."});
4155 
4156       // Guarantee that all emitted copies are stuck together.
4157       Glue = Chain.getValue(1);
4158       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4159     }
4160   }
4161 
4162   RetOps[0] = Chain; // Update chain.
4163 
4164   // Add the glue node if we have it.
4165   if (Glue.getNode()) {
4166     RetOps.push_back(Glue);
4167   }
4168 
4169   // Interrupt service routines use different return instructions.
4170   const Function &Func = DAG.getMachineFunction().getFunction();
4171   if (Func.hasFnAttribute("interrupt")) {
4172     if (!Func.getReturnType()->isVoidTy())
4173       report_fatal_error(
4174           "Functions with the interrupt attribute must have void return type!");
4175 
4176     MachineFunction &MF = DAG.getMachineFunction();
4177     StringRef Kind =
4178       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
4179 
4180     unsigned RetOpc;
4181     if (Kind == "user")
4182       RetOpc = RISCVISD::URET_FLAG;
4183     else if (Kind == "supervisor")
4184       RetOpc = RISCVISD::SRET_FLAG;
4185     else
4186       RetOpc = RISCVISD::MRET_FLAG;
4187 
4188     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
4189   }
4190 
4191   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
4192 }
4193 
4194 void RISCVTargetLowering::validateCCReservedRegs(
4195     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
4196     MachineFunction &MF) const {
4197   const Function &F = MF.getFunction();
4198   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
4199 
4200   if (llvm::any_of(Regs, [&STI](auto Reg) {
4201         return STI.isRegisterReservedByUser(Reg.first);
4202       }))
4203     F.getContext().diagnose(DiagnosticInfoUnsupported{
4204         F, "Argument register required, but has been reserved."});
4205 }
4206 
4207 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
4208   return CI->isTailCall();
4209 }
4210 
4211 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
4212 #define NODE_NAME_CASE(NODE)                                                   \
4213   case RISCVISD::NODE:                                                         \
4214     return "RISCVISD::" #NODE;
4215   // clang-format off
4216   switch ((RISCVISD::NodeType)Opcode) {
4217   case RISCVISD::FIRST_NUMBER:
4218     break;
4219   NODE_NAME_CASE(RET_FLAG)
4220   NODE_NAME_CASE(URET_FLAG)
4221   NODE_NAME_CASE(SRET_FLAG)
4222   NODE_NAME_CASE(MRET_FLAG)
4223   NODE_NAME_CASE(CALL)
4224   NODE_NAME_CASE(SELECT_CC)
4225   NODE_NAME_CASE(BuildPairF64)
4226   NODE_NAME_CASE(SplitF64)
4227   NODE_NAME_CASE(TAIL)
4228   NODE_NAME_CASE(SLLW)
4229   NODE_NAME_CASE(SRAW)
4230   NODE_NAME_CASE(SRLW)
4231   NODE_NAME_CASE(DIVW)
4232   NODE_NAME_CASE(DIVUW)
4233   NODE_NAME_CASE(REMUW)
4234   NODE_NAME_CASE(ROLW)
4235   NODE_NAME_CASE(RORW)
4236   NODE_NAME_CASE(FSLW)
4237   NODE_NAME_CASE(FSRW)
4238   NODE_NAME_CASE(FSL)
4239   NODE_NAME_CASE(FSR)
4240   NODE_NAME_CASE(FMV_H_X)
4241   NODE_NAME_CASE(FMV_X_ANYEXTH)
4242   NODE_NAME_CASE(FMV_W_X_RV64)
4243   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
4244   NODE_NAME_CASE(READ_CYCLE_WIDE)
4245   NODE_NAME_CASE(GREVI)
4246   NODE_NAME_CASE(GREVIW)
4247   NODE_NAME_CASE(GORCI)
4248   NODE_NAME_CASE(GORCIW)
4249   NODE_NAME_CASE(VMV_X_S)
4250   NODE_NAME_CASE(SPLAT_VECTOR_I64)
4251   NODE_NAME_CASE(READ_VLENB)
4252   NODE_NAME_CASE(TRUNCATE_VECTOR)
4253   NODE_NAME_CASE(VLEFF)
4254   NODE_NAME_CASE(VLEFF_MASK)
4255   NODE_NAME_CASE(VSLIDEUP)
4256   NODE_NAME_CASE(VSLIDEDOWN)
4257   NODE_NAME_CASE(VID)
4258   NODE_NAME_CASE(VFNCVT_ROD)
4259   NODE_NAME_CASE(VECREDUCE_ADD)
4260   NODE_NAME_CASE(VECREDUCE_UMAX)
4261   NODE_NAME_CASE(VECREDUCE_SMAX)
4262   NODE_NAME_CASE(VECREDUCE_UMIN)
4263   NODE_NAME_CASE(VECREDUCE_SMIN)
4264   NODE_NAME_CASE(VECREDUCE_AND)
4265   NODE_NAME_CASE(VECREDUCE_OR)
4266   NODE_NAME_CASE(VECREDUCE_XOR)
4267   }
4268   // clang-format on
4269   return nullptr;
4270 #undef NODE_NAME_CASE
4271 }
4272 
4273 /// getConstraintType - Given a constraint letter, return the type of
4274 /// constraint it is for this target.
4275 RISCVTargetLowering::ConstraintType
4276 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
4277   if (Constraint.size() == 1) {
4278     switch (Constraint[0]) {
4279     default:
4280       break;
4281     case 'f':
4282       return C_RegisterClass;
4283     case 'I':
4284     case 'J':
4285     case 'K':
4286       return C_Immediate;
4287     case 'A':
4288       return C_Memory;
4289     }
4290   }
4291   return TargetLowering::getConstraintType(Constraint);
4292 }
4293 
4294 std::pair<unsigned, const TargetRegisterClass *>
4295 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
4296                                                   StringRef Constraint,
4297                                                   MVT VT) const {
4298   // First, see if this is a constraint that directly corresponds to a
4299   // RISCV register class.
4300   if (Constraint.size() == 1) {
4301     switch (Constraint[0]) {
4302     case 'r':
4303       return std::make_pair(0U, &RISCV::GPRRegClass);
4304     case 'f':
4305       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
4306         return std::make_pair(0U, &RISCV::FPR16RegClass);
4307       if (Subtarget.hasStdExtF() && VT == MVT::f32)
4308         return std::make_pair(0U, &RISCV::FPR32RegClass);
4309       if (Subtarget.hasStdExtD() && VT == MVT::f64)
4310         return std::make_pair(0U, &RISCV::FPR64RegClass);
4311       break;
4312     default:
4313       break;
4314     }
4315   }
4316 
4317   // Clang will correctly decode the usage of register name aliases into their
4318   // official names. However, other frontends like `rustc` do not. This allows
4319   // users of these frontends to use the ABI names for registers in LLVM-style
4320   // register constraints.
4321   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
4322                                .Case("{zero}", RISCV::X0)
4323                                .Case("{ra}", RISCV::X1)
4324                                .Case("{sp}", RISCV::X2)
4325                                .Case("{gp}", RISCV::X3)
4326                                .Case("{tp}", RISCV::X4)
4327                                .Case("{t0}", RISCV::X5)
4328                                .Case("{t1}", RISCV::X6)
4329                                .Case("{t2}", RISCV::X7)
4330                                .Cases("{s0}", "{fp}", RISCV::X8)
4331                                .Case("{s1}", RISCV::X9)
4332                                .Case("{a0}", RISCV::X10)
4333                                .Case("{a1}", RISCV::X11)
4334                                .Case("{a2}", RISCV::X12)
4335                                .Case("{a3}", RISCV::X13)
4336                                .Case("{a4}", RISCV::X14)
4337                                .Case("{a5}", RISCV::X15)
4338                                .Case("{a6}", RISCV::X16)
4339                                .Case("{a7}", RISCV::X17)
4340                                .Case("{s2}", RISCV::X18)
4341                                .Case("{s3}", RISCV::X19)
4342                                .Case("{s4}", RISCV::X20)
4343                                .Case("{s5}", RISCV::X21)
4344                                .Case("{s6}", RISCV::X22)
4345                                .Case("{s7}", RISCV::X23)
4346                                .Case("{s8}", RISCV::X24)
4347                                .Case("{s9}", RISCV::X25)
4348                                .Case("{s10}", RISCV::X26)
4349                                .Case("{s11}", RISCV::X27)
4350                                .Case("{t3}", RISCV::X28)
4351                                .Case("{t4}", RISCV::X29)
4352                                .Case("{t5}", RISCV::X30)
4353                                .Case("{t6}", RISCV::X31)
4354                                .Default(RISCV::NoRegister);
4355   if (XRegFromAlias != RISCV::NoRegister)
4356     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
4357 
4358   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
4359   // TableGen record rather than the AsmName to choose registers for InlineAsm
4360   // constraints, plus we want to match those names to the widest floating point
4361   // register type available, manually select floating point registers here.
4362   //
4363   // The second case is the ABI name of the register, so that frontends can also
4364   // use the ABI names in register constraint lists.
4365   if (Subtarget.hasStdExtF()) {
4366     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
4367                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
4368                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
4369                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
4370                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
4371                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
4372                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
4373                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
4374                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
4375                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
4376                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
4377                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
4378                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
4379                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
4380                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
4381                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
4382                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
4383                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
4384                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
4385                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
4386                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
4387                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
4388                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
4389                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
4390                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
4391                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
4392                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
4393                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
4394                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
4395                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
4396                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
4397                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
4398                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
4399                         .Default(RISCV::NoRegister);
4400     if (FReg != RISCV::NoRegister) {
4401       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
4402       if (Subtarget.hasStdExtD()) {
4403         unsigned RegNo = FReg - RISCV::F0_F;
4404         unsigned DReg = RISCV::F0_D + RegNo;
4405         return std::make_pair(DReg, &RISCV::FPR64RegClass);
4406       }
4407       return std::make_pair(FReg, &RISCV::FPR32RegClass);
4408     }
4409   }
4410 
4411   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4412 }
4413 
4414 unsigned
4415 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
4416   // Currently only support length 1 constraints.
4417   if (ConstraintCode.size() == 1) {
4418     switch (ConstraintCode[0]) {
4419     case 'A':
4420       return InlineAsm::Constraint_A;
4421     default:
4422       break;
4423     }
4424   }
4425 
4426   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
4427 }
4428 
4429 void RISCVTargetLowering::LowerAsmOperandForConstraint(
4430     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
4431     SelectionDAG &DAG) const {
4432   // Currently only support length 1 constraints.
4433   if (Constraint.length() == 1) {
4434     switch (Constraint[0]) {
4435     case 'I':
4436       // Validate & create a 12-bit signed immediate operand.
4437       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4438         uint64_t CVal = C->getSExtValue();
4439         if (isInt<12>(CVal))
4440           Ops.push_back(
4441               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
4442       }
4443       return;
4444     case 'J':
4445       // Validate & create an integer zero operand.
4446       if (auto *C = dyn_cast<ConstantSDNode>(Op))
4447         if (C->getZExtValue() == 0)
4448           Ops.push_back(
4449               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
4450       return;
4451     case 'K':
4452       // Validate & create a 5-bit unsigned immediate operand.
4453       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4454         uint64_t CVal = C->getZExtValue();
4455         if (isUInt<5>(CVal))
4456           Ops.push_back(
4457               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
4458       }
4459       return;
4460     default:
4461       break;
4462     }
4463   }
4464   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4465 }
4466 
4467 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
4468                                                    Instruction *Inst,
4469                                                    AtomicOrdering Ord) const {
4470   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
4471     return Builder.CreateFence(Ord);
4472   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
4473     return Builder.CreateFence(AtomicOrdering::Release);
4474   return nullptr;
4475 }
4476 
4477 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
4478                                                     Instruction *Inst,
4479                                                     AtomicOrdering Ord) const {
4480   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
4481     return Builder.CreateFence(AtomicOrdering::Acquire);
4482   return nullptr;
4483 }
4484 
4485 TargetLowering::AtomicExpansionKind
4486 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
4487   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
4488   // point operations can't be used in an lr/sc sequence without breaking the
4489   // forward-progress guarantee.
4490   if (AI->isFloatingPointOperation())
4491     return AtomicExpansionKind::CmpXChg;
4492 
4493   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4494   if (Size == 8 || Size == 16)
4495     return AtomicExpansionKind::MaskedIntrinsic;
4496   return AtomicExpansionKind::None;
4497 }
4498 
4499 static Intrinsic::ID
4500 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
4501   if (XLen == 32) {
4502     switch (BinOp) {
4503     default:
4504       llvm_unreachable("Unexpected AtomicRMW BinOp");
4505     case AtomicRMWInst::Xchg:
4506       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
4507     case AtomicRMWInst::Add:
4508       return Intrinsic::riscv_masked_atomicrmw_add_i32;
4509     case AtomicRMWInst::Sub:
4510       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
4511     case AtomicRMWInst::Nand:
4512       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
4513     case AtomicRMWInst::Max:
4514       return Intrinsic::riscv_masked_atomicrmw_max_i32;
4515     case AtomicRMWInst::Min:
4516       return Intrinsic::riscv_masked_atomicrmw_min_i32;
4517     case AtomicRMWInst::UMax:
4518       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
4519     case AtomicRMWInst::UMin:
4520       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
4521     }
4522   }
4523 
4524   if (XLen == 64) {
4525     switch (BinOp) {
4526     default:
4527       llvm_unreachable("Unexpected AtomicRMW BinOp");
4528     case AtomicRMWInst::Xchg:
4529       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
4530     case AtomicRMWInst::Add:
4531       return Intrinsic::riscv_masked_atomicrmw_add_i64;
4532     case AtomicRMWInst::Sub:
4533       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
4534     case AtomicRMWInst::Nand:
4535       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
4536     case AtomicRMWInst::Max:
4537       return Intrinsic::riscv_masked_atomicrmw_max_i64;
4538     case AtomicRMWInst::Min:
4539       return Intrinsic::riscv_masked_atomicrmw_min_i64;
4540     case AtomicRMWInst::UMax:
4541       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
4542     case AtomicRMWInst::UMin:
4543       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
4544     }
4545   }
4546 
4547   llvm_unreachable("Unexpected XLen\n");
4548 }
4549 
4550 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
4551     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4552     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4553   unsigned XLen = Subtarget.getXLen();
4554   Value *Ordering =
4555       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
4556   Type *Tys[] = {AlignedAddr->getType()};
4557   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
4558       AI->getModule(),
4559       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
4560 
4561   if (XLen == 64) {
4562     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4563     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4564     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4565   }
4566 
4567   Value *Result;
4568 
4569   // Must pass the shift amount needed to sign extend the loaded value prior
4570   // to performing a signed comparison for min/max. ShiftAmt is the number of
4571   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
4572   // is the number of bits to left+right shift the value in order to
4573   // sign-extend.
4574   if (AI->getOperation() == AtomicRMWInst::Min ||
4575       AI->getOperation() == AtomicRMWInst::Max) {
4576     const DataLayout &DL = AI->getModule()->getDataLayout();
4577     unsigned ValWidth =
4578         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4579     Value *SextShamt =
4580         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
4581     Result = Builder.CreateCall(LrwOpScwLoop,
4582                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4583   } else {
4584     Result =
4585         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4586   }
4587 
4588   if (XLen == 64)
4589     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4590   return Result;
4591 }
4592 
4593 TargetLowering::AtomicExpansionKind
4594 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
4595     AtomicCmpXchgInst *CI) const {
4596   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
4597   if (Size == 8 || Size == 16)
4598     return AtomicExpansionKind::MaskedIntrinsic;
4599   return AtomicExpansionKind::None;
4600 }
4601 
4602 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
4603     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4604     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4605   unsigned XLen = Subtarget.getXLen();
4606   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
4607   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
4608   if (XLen == 64) {
4609     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4610     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4611     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4612     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
4613   }
4614   Type *Tys[] = {AlignedAddr->getType()};
4615   Function *MaskedCmpXchg =
4616       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4617   Value *Result = Builder.CreateCall(
4618       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
4619   if (XLen == 64)
4620     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4621   return Result;
4622 }
4623 
4624 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
4625                                                      EVT VT) const {
4626   VT = VT.getScalarType();
4627 
4628   if (!VT.isSimple())
4629     return false;
4630 
4631   switch (VT.getSimpleVT().SimpleTy) {
4632   case MVT::f16:
4633     return Subtarget.hasStdExtZfh();
4634   case MVT::f32:
4635     return Subtarget.hasStdExtF();
4636   case MVT::f64:
4637     return Subtarget.hasStdExtD();
4638   default:
4639     break;
4640   }
4641 
4642   return false;
4643 }
4644 
4645 Register RISCVTargetLowering::getExceptionPointerRegister(
4646     const Constant *PersonalityFn) const {
4647   return RISCV::X10;
4648 }
4649 
4650 Register RISCVTargetLowering::getExceptionSelectorRegister(
4651     const Constant *PersonalityFn) const {
4652   return RISCV::X11;
4653 }
4654 
4655 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
4656   // Return false to suppress the unnecessary extensions if the LibCall
4657   // arguments or return value is f32 type for LP64 ABI.
4658   RISCVABI::ABI ABI = Subtarget.getTargetABI();
4659   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
4660     return false;
4661 
4662   return true;
4663 }
4664 
4665 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
4666   if (Subtarget.is64Bit() && Type == MVT::i32)
4667     return true;
4668 
4669   return IsSigned;
4670 }
4671 
4672 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
4673                                                  SDValue C) const {
4674   // Check integral scalar types.
4675   if (VT.isScalarInteger()) {
4676     // Omit the optimization if the sub target has the M extension and the data
4677     // size exceeds XLen.
4678     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
4679       return false;
4680     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
4681       // Break the MUL to a SLLI and an ADD/SUB.
4682       const APInt &Imm = ConstNode->getAPIntValue();
4683       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
4684           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
4685         return true;
4686       // Omit the following optimization if the sub target has the M extension
4687       // and the data size >= XLen.
4688       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
4689         return false;
4690       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
4691       // a pair of LUI/ADDI.
4692       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
4693         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
4694         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
4695             (1 - ImmS).isPowerOf2())
4696         return true;
4697       }
4698     }
4699   }
4700 
4701   return false;
4702 }
4703 
4704 #define GET_REGISTER_MATCHER
4705 #include "RISCVGenAsmMatcher.inc"
4706 
4707 Register
4708 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
4709                                        const MachineFunction &MF) const {
4710   Register Reg = MatchRegisterAltName(RegName);
4711   if (Reg == RISCV::NoRegister)
4712     Reg = MatchRegisterName(RegName);
4713   if (Reg == RISCV::NoRegister)
4714     report_fatal_error(
4715         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
4716   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
4717   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
4718     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
4719                              StringRef(RegName) + "\"."));
4720   return Reg;
4721 }
4722 
4723 namespace llvm {
4724 namespace RISCVVIntrinsicsTable {
4725 
4726 #define GET_RISCVVIntrinsicsTable_IMPL
4727 #include "RISCVGenSearchableTables.inc"
4728 
4729 } // namespace RISCVVIntrinsicsTable
4730 
4731 namespace RISCVZvlssegTable {
4732 
4733 #define GET_RISCVZvlssegTable_IMPL
4734 #include "RISCVGenSearchableTables.inc"
4735 
4736 } // namespace RISCVZvlssegTable
4737 } // namespace llvm
4738