1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "Utils/RISCVMatInt.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "riscv-lower"
41 
42 STATISTIC(NumTailCalls, "Number of tail calls");
43 
44 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
45                                          const RISCVSubtarget &STI)
46     : TargetLowering(TM), Subtarget(STI) {
47 
48   if (Subtarget.isRV32E())
49     report_fatal_error("Codegen not yet implemented for RV32E");
50 
51   RISCVABI::ABI ABI = Subtarget.getTargetABI();
52   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
53 
54   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
55       !Subtarget.hasStdExtF()) {
56     errs() << "Hard-float 'f' ABI can't be used for a target that "
57                 "doesn't support the F instruction set extension (ignoring "
58                           "target-abi)\n";
59     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
60   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
61              !Subtarget.hasStdExtD()) {
62     errs() << "Hard-float 'd' ABI can't be used for a target that "
63               "doesn't support the D instruction set extension (ignoring "
64               "target-abi)\n";
65     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
66   }
67 
68   switch (ABI) {
69   default:
70     report_fatal_error("Don't know how to lower this ABI");
71   case RISCVABI::ABI_ILP32:
72   case RISCVABI::ABI_ILP32F:
73   case RISCVABI::ABI_ILP32D:
74   case RISCVABI::ABI_LP64:
75   case RISCVABI::ABI_LP64F:
76   case RISCVABI::ABI_LP64D:
77     break;
78   }
79 
80   MVT XLenVT = Subtarget.getXLenVT();
81 
82   // Set up the register classes.
83   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
84 
85   if (Subtarget.hasStdExtZfh())
86     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
87   if (Subtarget.hasStdExtF())
88     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
89   if (Subtarget.hasStdExtD())
90     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
91 
92   if (Subtarget.hasStdExtV()) {
93     addRegisterClass(RISCVVMVTs::vbool64_t, &RISCV::VRRegClass);
94     addRegisterClass(RISCVVMVTs::vbool32_t, &RISCV::VRRegClass);
95     addRegisterClass(RISCVVMVTs::vbool16_t, &RISCV::VRRegClass);
96     addRegisterClass(RISCVVMVTs::vbool8_t, &RISCV::VRRegClass);
97     addRegisterClass(RISCVVMVTs::vbool4_t, &RISCV::VRRegClass);
98     addRegisterClass(RISCVVMVTs::vbool2_t, &RISCV::VRRegClass);
99     addRegisterClass(RISCVVMVTs::vbool1_t, &RISCV::VRRegClass);
100 
101     addRegisterClass(RISCVVMVTs::vint8mf8_t, &RISCV::VRRegClass);
102     addRegisterClass(RISCVVMVTs::vint8mf4_t, &RISCV::VRRegClass);
103     addRegisterClass(RISCVVMVTs::vint8mf2_t, &RISCV::VRRegClass);
104     addRegisterClass(RISCVVMVTs::vint8m1_t, &RISCV::VRRegClass);
105     addRegisterClass(RISCVVMVTs::vint8m2_t, &RISCV::VRM2RegClass);
106     addRegisterClass(RISCVVMVTs::vint8m4_t, &RISCV::VRM4RegClass);
107     addRegisterClass(RISCVVMVTs::vint8m8_t, &RISCV::VRM8RegClass);
108 
109     addRegisterClass(RISCVVMVTs::vint16mf4_t, &RISCV::VRRegClass);
110     addRegisterClass(RISCVVMVTs::vint16mf2_t, &RISCV::VRRegClass);
111     addRegisterClass(RISCVVMVTs::vint16m1_t, &RISCV::VRRegClass);
112     addRegisterClass(RISCVVMVTs::vint16m2_t, &RISCV::VRM2RegClass);
113     addRegisterClass(RISCVVMVTs::vint16m4_t, &RISCV::VRM4RegClass);
114     addRegisterClass(RISCVVMVTs::vint16m8_t, &RISCV::VRM8RegClass);
115 
116     addRegisterClass(RISCVVMVTs::vint32mf2_t, &RISCV::VRRegClass);
117     addRegisterClass(RISCVVMVTs::vint32m1_t, &RISCV::VRRegClass);
118     addRegisterClass(RISCVVMVTs::vint32m2_t, &RISCV::VRM2RegClass);
119     addRegisterClass(RISCVVMVTs::vint32m4_t, &RISCV::VRM4RegClass);
120     addRegisterClass(RISCVVMVTs::vint32m8_t, &RISCV::VRM8RegClass);
121 
122     addRegisterClass(RISCVVMVTs::vint64m1_t, &RISCV::VRRegClass);
123     addRegisterClass(RISCVVMVTs::vint64m2_t, &RISCV::VRM2RegClass);
124     addRegisterClass(RISCVVMVTs::vint64m4_t, &RISCV::VRM4RegClass);
125     addRegisterClass(RISCVVMVTs::vint64m8_t, &RISCV::VRM8RegClass);
126 
127     if (Subtarget.hasStdExtZfh()) {
128       addRegisterClass(RISCVVMVTs::vfloat16mf4_t, &RISCV::VRRegClass);
129       addRegisterClass(RISCVVMVTs::vfloat16mf2_t, &RISCV::VRRegClass);
130       addRegisterClass(RISCVVMVTs::vfloat16m1_t, &RISCV::VRRegClass);
131       addRegisterClass(RISCVVMVTs::vfloat16m2_t, &RISCV::VRM2RegClass);
132       addRegisterClass(RISCVVMVTs::vfloat16m4_t, &RISCV::VRM4RegClass);
133       addRegisterClass(RISCVVMVTs::vfloat16m8_t, &RISCV::VRM8RegClass);
134     }
135 
136     if (Subtarget.hasStdExtF()) {
137       addRegisterClass(RISCVVMVTs::vfloat32mf2_t, &RISCV::VRRegClass);
138       addRegisterClass(RISCVVMVTs::vfloat32m1_t, &RISCV::VRRegClass);
139       addRegisterClass(RISCVVMVTs::vfloat32m2_t, &RISCV::VRM2RegClass);
140       addRegisterClass(RISCVVMVTs::vfloat32m4_t, &RISCV::VRM4RegClass);
141       addRegisterClass(RISCVVMVTs::vfloat32m8_t, &RISCV::VRM8RegClass);
142     }
143 
144     if (Subtarget.hasStdExtD()) {
145       addRegisterClass(RISCVVMVTs::vfloat64m1_t, &RISCV::VRRegClass);
146       addRegisterClass(RISCVVMVTs::vfloat64m2_t, &RISCV::VRM2RegClass);
147       addRegisterClass(RISCVVMVTs::vfloat64m4_t, &RISCV::VRM4RegClass);
148       addRegisterClass(RISCVVMVTs::vfloat64m8_t, &RISCV::VRM8RegClass);
149     }
150   }
151 
152   // Compute derived properties from the register classes.
153   computeRegisterProperties(STI.getRegisterInfo());
154 
155   setStackPointerRegisterToSaveRestore(RISCV::X2);
156 
157   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
158     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
159 
160   // TODO: add all necessary setOperationAction calls.
161   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
162 
163   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
164   setOperationAction(ISD::BR_CC, XLenVT, Expand);
165   setOperationAction(ISD::SELECT, XLenVT, Custom);
166   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
167 
168   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
169   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
170 
171   setOperationAction(ISD::VASTART, MVT::Other, Custom);
172   setOperationAction(ISD::VAARG, MVT::Other, Expand);
173   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
174   setOperationAction(ISD::VAEND, MVT::Other, Expand);
175 
176   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
177   if (!Subtarget.hasStdExtZbb()) {
178     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
179     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
180   }
181 
182   if (Subtarget.is64Bit()) {
183     setOperationAction(ISD::ADD, MVT::i32, Custom);
184     setOperationAction(ISD::SUB, MVT::i32, Custom);
185     setOperationAction(ISD::SHL, MVT::i32, Custom);
186     setOperationAction(ISD::SRA, MVT::i32, Custom);
187     setOperationAction(ISD::SRL, MVT::i32, Custom);
188   }
189 
190   if (!Subtarget.hasStdExtM()) {
191     setOperationAction(ISD::MUL, XLenVT, Expand);
192     setOperationAction(ISD::MULHS, XLenVT, Expand);
193     setOperationAction(ISD::MULHU, XLenVT, Expand);
194     setOperationAction(ISD::SDIV, XLenVT, Expand);
195     setOperationAction(ISD::UDIV, XLenVT, Expand);
196     setOperationAction(ISD::SREM, XLenVT, Expand);
197     setOperationAction(ISD::UREM, XLenVT, Expand);
198   }
199 
200   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
201     setOperationAction(ISD::MUL, MVT::i32, Custom);
202     setOperationAction(ISD::SDIV, MVT::i32, Custom);
203     setOperationAction(ISD::UDIV, MVT::i32, Custom);
204     setOperationAction(ISD::UREM, MVT::i32, Custom);
205   }
206 
207   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
208   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
209   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
210   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
211 
212   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
213   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
214   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
215 
216   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
217     if (Subtarget.is64Bit()) {
218       setOperationAction(ISD::ROTL, MVT::i32, Custom);
219       setOperationAction(ISD::ROTR, MVT::i32, Custom);
220     }
221   } else {
222     setOperationAction(ISD::ROTL, XLenVT, Expand);
223     setOperationAction(ISD::ROTR, XLenVT, Expand);
224   }
225 
226   if (Subtarget.hasStdExtZbp()) {
227     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
228     setOperationAction(ISD::BSWAP, XLenVT, Custom);
229 
230     if (Subtarget.is64Bit()) {
231       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
232       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
233     }
234   } else {
235     setOperationAction(ISD::BSWAP, XLenVT, Expand);
236   }
237 
238   if (Subtarget.hasStdExtZbb()) {
239     setOperationAction(ISD::SMIN, XLenVT, Legal);
240     setOperationAction(ISD::SMAX, XLenVT, Legal);
241     setOperationAction(ISD::UMIN, XLenVT, Legal);
242     setOperationAction(ISD::UMAX, XLenVT, Legal);
243   } else {
244     setOperationAction(ISD::CTTZ, XLenVT, Expand);
245     setOperationAction(ISD::CTLZ, XLenVT, Expand);
246     setOperationAction(ISD::CTPOP, XLenVT, Expand);
247   }
248 
249   if (Subtarget.hasStdExtZbt()) {
250     setOperationAction(ISD::FSHL, XLenVT, Legal);
251     setOperationAction(ISD::FSHR, XLenVT, Legal);
252 
253     if (Subtarget.is64Bit()) {
254       setOperationAction(ISD::FSHL, MVT::i32, Custom);
255       setOperationAction(ISD::FSHR, MVT::i32, Custom);
256     }
257   }
258 
259   ISD::CondCode FPCCToExpand[] = {
260       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
261       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
262       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
263 
264   ISD::NodeType FPOpToExpand[] = {
265       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
266       ISD::FP_TO_FP16};
267 
268   if (Subtarget.hasStdExtZfh())
269     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
270 
271   if (Subtarget.hasStdExtZfh()) {
272     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
273     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
274     for (auto CC : FPCCToExpand)
275       setCondCodeAction(CC, MVT::f16, Expand);
276     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
277     setOperationAction(ISD::SELECT, MVT::f16, Custom);
278     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
279     for (auto Op : FPOpToExpand)
280       setOperationAction(Op, MVT::f16, Expand);
281   }
282 
283   if (Subtarget.hasStdExtF()) {
284     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
285     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
286     for (auto CC : FPCCToExpand)
287       setCondCodeAction(CC, MVT::f32, Expand);
288     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
289     setOperationAction(ISD::SELECT, MVT::f32, Custom);
290     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
291     for (auto Op : FPOpToExpand)
292       setOperationAction(Op, MVT::f32, Expand);
293     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
294     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
295   }
296 
297   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
298     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
299 
300   if (Subtarget.hasStdExtD()) {
301     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
302     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
303     for (auto CC : FPCCToExpand)
304       setCondCodeAction(CC, MVT::f64, Expand);
305     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
306     setOperationAction(ISD::SELECT, MVT::f64, Custom);
307     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
308     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
309     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
310     for (auto Op : FPOpToExpand)
311       setOperationAction(Op, MVT::f64, Expand);
312     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
313     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
314   }
315 
316   if (Subtarget.is64Bit()) {
317     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
318     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
319     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
320     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
321   }
322 
323   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
324   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
325   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
326 
327   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
328 
329   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
330   // Unfortunately this can't be determined just from the ISA naming string.
331   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
332                      Subtarget.is64Bit() ? Legal : Custom);
333 
334   setOperationAction(ISD::TRAP, MVT::Other, Legal);
335   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
336   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
337 
338   if (Subtarget.hasStdExtA()) {
339     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
340     setMinCmpXchgSizeInBits(32);
341   } else {
342     setMaxAtomicSizeInBitsSupported(0);
343   }
344 
345   setBooleanContents(ZeroOrOneBooleanContent);
346 
347   if (Subtarget.hasStdExtV()) {
348     setBooleanVectorContents(ZeroOrOneBooleanContent);
349 
350     // RVV intrinsics may have illegal operands.
351     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
352     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
353     if (Subtarget.is64Bit())
354       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
355   }
356 
357   // Function alignments.
358   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
359   setMinFunctionAlignment(FunctionAlignment);
360   setPrefFunctionAlignment(FunctionAlignment);
361 
362   // Effectively disable jump table generation.
363   setMinimumJumpTableEntries(INT_MAX);
364 
365   // Jumps are expensive, compared to logic
366   setJumpIsExpensive();
367 
368   // We can use any register for comparisons
369   setHasMultipleConditionRegisters();
370 
371   if (Subtarget.hasStdExtZbp()) {
372     setTargetDAGCombine(ISD::OR);
373   }
374 }
375 
376 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
377                                             EVT VT) const {
378   if (!VT.isVector())
379     return getPointerTy(DL);
380   return VT.changeVectorElementTypeToInteger();
381 }
382 
383 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
384                                              const CallInst &I,
385                                              MachineFunction &MF,
386                                              unsigned Intrinsic) const {
387   switch (Intrinsic) {
388   default:
389     return false;
390   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
391   case Intrinsic::riscv_masked_atomicrmw_add_i32:
392   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
393   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
394   case Intrinsic::riscv_masked_atomicrmw_max_i32:
395   case Intrinsic::riscv_masked_atomicrmw_min_i32:
396   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
397   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
398   case Intrinsic::riscv_masked_cmpxchg_i32:
399     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
400     Info.opc = ISD::INTRINSIC_W_CHAIN;
401     Info.memVT = MVT::getVT(PtrTy->getElementType());
402     Info.ptrVal = I.getArgOperand(0);
403     Info.offset = 0;
404     Info.align = Align(4);
405     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
406                  MachineMemOperand::MOVolatile;
407     return true;
408   }
409 }
410 
411 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
412                                                 const AddrMode &AM, Type *Ty,
413                                                 unsigned AS,
414                                                 Instruction *I) const {
415   // No global is ever allowed as a base.
416   if (AM.BaseGV)
417     return false;
418 
419   // Require a 12-bit signed offset.
420   if (!isInt<12>(AM.BaseOffs))
421     return false;
422 
423   switch (AM.Scale) {
424   case 0: // "r+i" or just "i", depending on HasBaseReg.
425     break;
426   case 1:
427     if (!AM.HasBaseReg) // allow "r+i".
428       break;
429     return false; // disallow "r+r" or "r+r+i".
430   default:
431     return false;
432   }
433 
434   return true;
435 }
436 
437 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
438   return isInt<12>(Imm);
439 }
440 
441 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
442   return isInt<12>(Imm);
443 }
444 
445 // On RV32, 64-bit integers are split into their high and low parts and held
446 // in two different registers, so the trunc is free since the low register can
447 // just be used.
448 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
449   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
450     return false;
451   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
452   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
453   return (SrcBits == 64 && DestBits == 32);
454 }
455 
456 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
457   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
458       !SrcVT.isInteger() || !DstVT.isInteger())
459     return false;
460   unsigned SrcBits = SrcVT.getSizeInBits();
461   unsigned DestBits = DstVT.getSizeInBits();
462   return (SrcBits == 64 && DestBits == 32);
463 }
464 
465 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
466   // Zexts are free if they can be combined with a load.
467   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
468     EVT MemVT = LD->getMemoryVT();
469     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
470          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
471         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
472          LD->getExtensionType() == ISD::ZEXTLOAD))
473       return true;
474   }
475 
476   return TargetLowering::isZExtFree(Val, VT2);
477 }
478 
479 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
480   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
481 }
482 
483 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
484   return Subtarget.hasStdExtZbb();
485 }
486 
487 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
488   return Subtarget.hasStdExtZbb();
489 }
490 
491 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
492                                        bool ForCodeSize) const {
493   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
494     return false;
495   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
496     return false;
497   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
498     return false;
499   if (Imm.isNegZero())
500     return false;
501   return Imm.isZero();
502 }
503 
504 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
505   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
506          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
507          (VT == MVT::f64 && Subtarget.hasStdExtD());
508 }
509 
510 // Changes the condition code and swaps operands if necessary, so the SetCC
511 // operation matches one of the comparisons supported directly in the RISC-V
512 // ISA.
513 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
514   switch (CC) {
515   default:
516     break;
517   case ISD::SETGT:
518   case ISD::SETLE:
519   case ISD::SETUGT:
520   case ISD::SETULE:
521     CC = ISD::getSetCCSwappedOperands(CC);
522     std::swap(LHS, RHS);
523     break;
524   }
525 }
526 
527 // Return the RISC-V branch opcode that matches the given DAG integer
528 // condition code. The CondCode must be one of those supported by the RISC-V
529 // ISA (see normaliseSetCC).
530 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
531   switch (CC) {
532   default:
533     llvm_unreachable("Unsupported CondCode");
534   case ISD::SETEQ:
535     return RISCV::BEQ;
536   case ISD::SETNE:
537     return RISCV::BNE;
538   case ISD::SETLT:
539     return RISCV::BLT;
540   case ISD::SETGE:
541     return RISCV::BGE;
542   case ISD::SETULT:
543     return RISCV::BLTU;
544   case ISD::SETUGE:
545     return RISCV::BGEU;
546   }
547 }
548 
549 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
550                                             SelectionDAG &DAG) const {
551   switch (Op.getOpcode()) {
552   default:
553     report_fatal_error("unimplemented operand");
554   case ISD::GlobalAddress:
555     return lowerGlobalAddress(Op, DAG);
556   case ISD::BlockAddress:
557     return lowerBlockAddress(Op, DAG);
558   case ISD::ConstantPool:
559     return lowerConstantPool(Op, DAG);
560   case ISD::GlobalTLSAddress:
561     return lowerGlobalTLSAddress(Op, DAG);
562   case ISD::SELECT:
563     return lowerSELECT(Op, DAG);
564   case ISD::VASTART:
565     return lowerVASTART(Op, DAG);
566   case ISD::FRAMEADDR:
567     return lowerFRAMEADDR(Op, DAG);
568   case ISD::RETURNADDR:
569     return lowerRETURNADDR(Op, DAG);
570   case ISD::SHL_PARTS:
571     return lowerShiftLeftParts(Op, DAG);
572   case ISD::SRA_PARTS:
573     return lowerShiftRightParts(Op, DAG, true);
574   case ISD::SRL_PARTS:
575     return lowerShiftRightParts(Op, DAG, false);
576   case ISD::BITCAST: {
577     assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
578             Subtarget.hasStdExtZfh()) &&
579            "Unexpected custom legalisation");
580     SDLoc DL(Op);
581     SDValue Op0 = Op.getOperand(0);
582     if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
583       if (Op0.getValueType() != MVT::i16)
584         return SDValue();
585       SDValue NewOp0 =
586           DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
587       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
588       return FPConv;
589     } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
590                Subtarget.hasStdExtF()) {
591       if (Op0.getValueType() != MVT::i32)
592         return SDValue();
593       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
594       SDValue FPConv =
595           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
596       return FPConv;
597     }
598     return SDValue();
599   }
600   case ISD::INTRINSIC_WO_CHAIN:
601     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
602   case ISD::BSWAP:
603   case ISD::BITREVERSE: {
604     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
605     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
606     MVT VT = Op.getSimpleValueType();
607     SDLoc DL(Op);
608     // Start with the maximum immediate value which is the bitwidth - 1.
609     unsigned Imm = VT.getSizeInBits() - 1;
610     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
611     if (Op.getOpcode() == ISD::BSWAP)
612       Imm &= ~0x7U;
613     return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
614                        DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
615   }
616   }
617 }
618 
619 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
620                              SelectionDAG &DAG, unsigned Flags) {
621   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
622 }
623 
624 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
625                              SelectionDAG &DAG, unsigned Flags) {
626   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
627                                    Flags);
628 }
629 
630 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
631                              SelectionDAG &DAG, unsigned Flags) {
632   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
633                                    N->getOffset(), Flags);
634 }
635 
636 template <class NodeTy>
637 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
638                                      bool IsLocal) const {
639   SDLoc DL(N);
640   EVT Ty = getPointerTy(DAG.getDataLayout());
641 
642   if (isPositionIndependent()) {
643     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
644     if (IsLocal)
645       // Use PC-relative addressing to access the symbol. This generates the
646       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
647       // %pcrel_lo(auipc)).
648       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
649 
650     // Use PC-relative addressing to access the GOT for this symbol, then load
651     // the address from the GOT. This generates the pattern (PseudoLA sym),
652     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
653     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
654   }
655 
656   switch (getTargetMachine().getCodeModel()) {
657   default:
658     report_fatal_error("Unsupported code model for lowering");
659   case CodeModel::Small: {
660     // Generate a sequence for accessing addresses within the first 2 GiB of
661     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
662     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
663     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
664     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
665     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
666   }
667   case CodeModel::Medium: {
668     // Generate a sequence for accessing addresses within any 2GiB range within
669     // the address space. This generates the pattern (PseudoLLA sym), which
670     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
671     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
672     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
673   }
674   }
675 }
676 
677 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
678                                                 SelectionDAG &DAG) const {
679   SDLoc DL(Op);
680   EVT Ty = Op.getValueType();
681   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
682   int64_t Offset = N->getOffset();
683   MVT XLenVT = Subtarget.getXLenVT();
684 
685   const GlobalValue *GV = N->getGlobal();
686   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
687   SDValue Addr = getAddr(N, DAG, IsLocal);
688 
689   // In order to maximise the opportunity for common subexpression elimination,
690   // emit a separate ADD node for the global address offset instead of folding
691   // it in the global address node. Later peephole optimisations may choose to
692   // fold it back in when profitable.
693   if (Offset != 0)
694     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
695                        DAG.getConstant(Offset, DL, XLenVT));
696   return Addr;
697 }
698 
699 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
700                                                SelectionDAG &DAG) const {
701   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
702 
703   return getAddr(N, DAG);
704 }
705 
706 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
707                                                SelectionDAG &DAG) const {
708   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
709 
710   return getAddr(N, DAG);
711 }
712 
713 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
714                                               SelectionDAG &DAG,
715                                               bool UseGOT) const {
716   SDLoc DL(N);
717   EVT Ty = getPointerTy(DAG.getDataLayout());
718   const GlobalValue *GV = N->getGlobal();
719   MVT XLenVT = Subtarget.getXLenVT();
720 
721   if (UseGOT) {
722     // Use PC-relative addressing to access the GOT for this TLS symbol, then
723     // load the address from the GOT and add the thread pointer. This generates
724     // the pattern (PseudoLA_TLS_IE sym), which expands to
725     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
726     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
727     SDValue Load =
728         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
729 
730     // Add the thread pointer.
731     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
732     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
733   }
734 
735   // Generate a sequence for accessing the address relative to the thread
736   // pointer, with the appropriate adjustment for the thread pointer offset.
737   // This generates the pattern
738   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
739   SDValue AddrHi =
740       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
741   SDValue AddrAdd =
742       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
743   SDValue AddrLo =
744       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
745 
746   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
747   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
748   SDValue MNAdd = SDValue(
749       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
750       0);
751   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
752 }
753 
754 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
755                                                SelectionDAG &DAG) const {
756   SDLoc DL(N);
757   EVT Ty = getPointerTy(DAG.getDataLayout());
758   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
759   const GlobalValue *GV = N->getGlobal();
760 
761   // Use a PC-relative addressing mode to access the global dynamic GOT address.
762   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
763   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
764   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
765   SDValue Load =
766       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
767 
768   // Prepare argument list to generate call.
769   ArgListTy Args;
770   ArgListEntry Entry;
771   Entry.Node = Load;
772   Entry.Ty = CallTy;
773   Args.push_back(Entry);
774 
775   // Setup call to __tls_get_addr.
776   TargetLowering::CallLoweringInfo CLI(DAG);
777   CLI.setDebugLoc(DL)
778       .setChain(DAG.getEntryNode())
779       .setLibCallee(CallingConv::C, CallTy,
780                     DAG.getExternalSymbol("__tls_get_addr", Ty),
781                     std::move(Args));
782 
783   return LowerCallTo(CLI).first;
784 }
785 
786 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
787                                                    SelectionDAG &DAG) const {
788   SDLoc DL(Op);
789   EVT Ty = Op.getValueType();
790   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
791   int64_t Offset = N->getOffset();
792   MVT XLenVT = Subtarget.getXLenVT();
793 
794   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
795 
796   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
797       CallingConv::GHC)
798     report_fatal_error("In GHC calling convention TLS is not supported");
799 
800   SDValue Addr;
801   switch (Model) {
802   case TLSModel::LocalExec:
803     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
804     break;
805   case TLSModel::InitialExec:
806     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
807     break;
808   case TLSModel::LocalDynamic:
809   case TLSModel::GeneralDynamic:
810     Addr = getDynamicTLSAddr(N, DAG);
811     break;
812   }
813 
814   // In order to maximise the opportunity for common subexpression elimination,
815   // emit a separate ADD node for the global address offset instead of folding
816   // it in the global address node. Later peephole optimisations may choose to
817   // fold it back in when profitable.
818   if (Offset != 0)
819     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
820                        DAG.getConstant(Offset, DL, XLenVT));
821   return Addr;
822 }
823 
824 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
825   SDValue CondV = Op.getOperand(0);
826   SDValue TrueV = Op.getOperand(1);
827   SDValue FalseV = Op.getOperand(2);
828   SDLoc DL(Op);
829   MVT XLenVT = Subtarget.getXLenVT();
830 
831   // If the result type is XLenVT and CondV is the output of a SETCC node
832   // which also operated on XLenVT inputs, then merge the SETCC node into the
833   // lowered RISCVISD::SELECT_CC to take advantage of the integer
834   // compare+branch instructions. i.e.:
835   // (select (setcc lhs, rhs, cc), truev, falsev)
836   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
837   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
838       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
839     SDValue LHS = CondV.getOperand(0);
840     SDValue RHS = CondV.getOperand(1);
841     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
842     ISD::CondCode CCVal = CC->get();
843 
844     normaliseSetCC(LHS, RHS, CCVal);
845 
846     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
847     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
848     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
849   }
850 
851   // Otherwise:
852   // (select condv, truev, falsev)
853   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
854   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
855   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
856 
857   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
858 
859   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
860 }
861 
862 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
863   MachineFunction &MF = DAG.getMachineFunction();
864   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
865 
866   SDLoc DL(Op);
867   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
868                                  getPointerTy(MF.getDataLayout()));
869 
870   // vastart just stores the address of the VarArgsFrameIndex slot into the
871   // memory location argument.
872   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
873   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
874                       MachinePointerInfo(SV));
875 }
876 
877 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
878                                             SelectionDAG &DAG) const {
879   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
880   MachineFunction &MF = DAG.getMachineFunction();
881   MachineFrameInfo &MFI = MF.getFrameInfo();
882   MFI.setFrameAddressIsTaken(true);
883   Register FrameReg = RI.getFrameRegister(MF);
884   int XLenInBytes = Subtarget.getXLen() / 8;
885 
886   EVT VT = Op.getValueType();
887   SDLoc DL(Op);
888   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
889   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
890   while (Depth--) {
891     int Offset = -(XLenInBytes * 2);
892     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
893                               DAG.getIntPtrConstant(Offset, DL));
894     FrameAddr =
895         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
896   }
897   return FrameAddr;
898 }
899 
900 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
901                                              SelectionDAG &DAG) const {
902   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
903   MachineFunction &MF = DAG.getMachineFunction();
904   MachineFrameInfo &MFI = MF.getFrameInfo();
905   MFI.setReturnAddressIsTaken(true);
906   MVT XLenVT = Subtarget.getXLenVT();
907   int XLenInBytes = Subtarget.getXLen() / 8;
908 
909   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
910     return SDValue();
911 
912   EVT VT = Op.getValueType();
913   SDLoc DL(Op);
914   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
915   if (Depth) {
916     int Off = -XLenInBytes;
917     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
918     SDValue Offset = DAG.getConstant(Off, DL, VT);
919     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
920                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
921                        MachinePointerInfo());
922   }
923 
924   // Return the value of the return address register, marking it an implicit
925   // live-in.
926   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
927   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
928 }
929 
930 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
931                                                  SelectionDAG &DAG) const {
932   SDLoc DL(Op);
933   SDValue Lo = Op.getOperand(0);
934   SDValue Hi = Op.getOperand(1);
935   SDValue Shamt = Op.getOperand(2);
936   EVT VT = Lo.getValueType();
937 
938   // if Shamt-XLEN < 0: // Shamt < XLEN
939   //   Lo = Lo << Shamt
940   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
941   // else:
942   //   Lo = 0
943   //   Hi = Lo << (Shamt-XLEN)
944 
945   SDValue Zero = DAG.getConstant(0, DL, VT);
946   SDValue One = DAG.getConstant(1, DL, VT);
947   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
948   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
949   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
950   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
951 
952   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
953   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
954   SDValue ShiftRightLo =
955       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
956   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
957   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
958   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
959 
960   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
961 
962   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
963   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
964 
965   SDValue Parts[2] = {Lo, Hi};
966   return DAG.getMergeValues(Parts, DL);
967 }
968 
969 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
970                                                   bool IsSRA) const {
971   SDLoc DL(Op);
972   SDValue Lo = Op.getOperand(0);
973   SDValue Hi = Op.getOperand(1);
974   SDValue Shamt = Op.getOperand(2);
975   EVT VT = Lo.getValueType();
976 
977   // SRA expansion:
978   //   if Shamt-XLEN < 0: // Shamt < XLEN
979   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
980   //     Hi = Hi >>s Shamt
981   //   else:
982   //     Lo = Hi >>s (Shamt-XLEN);
983   //     Hi = Hi >>s (XLEN-1)
984   //
985   // SRL expansion:
986   //   if Shamt-XLEN < 0: // Shamt < XLEN
987   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
988   //     Hi = Hi >>u Shamt
989   //   else:
990   //     Lo = Hi >>u (Shamt-XLEN);
991   //     Hi = 0;
992 
993   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
994 
995   SDValue Zero = DAG.getConstant(0, DL, VT);
996   SDValue One = DAG.getConstant(1, DL, VT);
997   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
998   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
999   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1000   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1001 
1002   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1003   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1004   SDValue ShiftLeftHi =
1005       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
1006   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1007   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1008   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
1009   SDValue HiFalse =
1010       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
1011 
1012   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1013 
1014   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1015   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1016 
1017   SDValue Parts[2] = {Lo, Hi};
1018   return DAG.getMergeValues(Parts, DL);
1019 }
1020 
1021 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1022                                                      SelectionDAG &DAG) const {
1023   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1024   SDLoc DL(Op);
1025 
1026   if (Subtarget.hasStdExtV()) {
1027     // Some RVV intrinsics may claim that they want an integer operand to be
1028     // extended.
1029     if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1030             RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1031       if (II->ExtendedOperand) {
1032         assert(II->ExtendedOperand < Op.getNumOperands());
1033         SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1034         SDValue &ScalarOp = Operands[II->ExtendedOperand];
1035         if (ScalarOp.getValueType() == MVT::i8 ||
1036             ScalarOp.getValueType() == MVT::i16 ||
1037             ScalarOp.getValueType() == MVT::i32) {
1038           ScalarOp =
1039               DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), ScalarOp);
1040           return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
1041                              Operands);
1042         }
1043       }
1044     }
1045   }
1046 
1047   switch (IntNo) {
1048   default:
1049     return SDValue();    // Don't custom lower most intrinsics.
1050   case Intrinsic::thread_pointer: {
1051     EVT PtrVT = getPointerTy(DAG.getDataLayout());
1052     return DAG.getRegister(RISCV::X4, PtrVT);
1053   }
1054   }
1055 }
1056 
1057 // Returns the opcode of the target-specific SDNode that implements the 32-bit
1058 // form of the given Opcode.
1059 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
1060   switch (Opcode) {
1061   default:
1062     llvm_unreachable("Unexpected opcode");
1063   case ISD::SHL:
1064     return RISCVISD::SLLW;
1065   case ISD::SRA:
1066     return RISCVISD::SRAW;
1067   case ISD::SRL:
1068     return RISCVISD::SRLW;
1069   case ISD::SDIV:
1070     return RISCVISD::DIVW;
1071   case ISD::UDIV:
1072     return RISCVISD::DIVUW;
1073   case ISD::UREM:
1074     return RISCVISD::REMUW;
1075   case ISD::ROTL:
1076     return RISCVISD::ROLW;
1077   case ISD::ROTR:
1078     return RISCVISD::RORW;
1079   case RISCVISD::GREVI:
1080     return RISCVISD::GREVIW;
1081   case RISCVISD::GORCI:
1082     return RISCVISD::GORCIW;
1083   }
1084 }
1085 
1086 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
1087 // Because i32 isn't a legal type for RV64, these operations would otherwise
1088 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
1089 // later one because the fact the operation was originally of type i32 is
1090 // lost.
1091 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
1092   SDLoc DL(N);
1093   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1094   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1095   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1096   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1097   // ReplaceNodeResults requires we maintain the same type for the return value.
1098   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
1099 }
1100 
1101 // Converts the given 32-bit operation to a i64 operation with signed extension
1102 // semantic to reduce the signed extension instructions.
1103 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
1104   SDLoc DL(N);
1105   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1106   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1107   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
1108   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
1109                                DAG.getValueType(MVT::i32));
1110   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
1111 }
1112 
1113 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1114                                              SmallVectorImpl<SDValue> &Results,
1115                                              SelectionDAG &DAG) const {
1116   SDLoc DL(N);
1117   switch (N->getOpcode()) {
1118   default:
1119     llvm_unreachable("Don't know how to custom type legalize this operation!");
1120   case ISD::STRICT_FP_TO_SINT:
1121   case ISD::STRICT_FP_TO_UINT:
1122   case ISD::FP_TO_SINT:
1123   case ISD::FP_TO_UINT: {
1124     bool IsStrict = N->isStrictFPOpcode();
1125     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1126            "Unexpected custom legalisation");
1127     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
1128     // If the FP type needs to be softened, emit a library call using the 'si'
1129     // version. If we left it to default legalization we'd end up with 'di'. If
1130     // the FP type doesn't need to be softened just let generic type
1131     // legalization promote the result type.
1132     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
1133         TargetLowering::TypeSoftenFloat)
1134       return;
1135     RTLIB::Libcall LC;
1136     if (N->getOpcode() == ISD::FP_TO_SINT ||
1137         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
1138       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
1139     else
1140       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
1141     MakeLibCallOptions CallOptions;
1142     EVT OpVT = Op0.getValueType();
1143     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
1144     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
1145     SDValue Result;
1146     std::tie(Result, Chain) =
1147         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
1148     Results.push_back(Result);
1149     if (IsStrict)
1150       Results.push_back(Chain);
1151     break;
1152   }
1153   case ISD::READCYCLECOUNTER: {
1154     assert(!Subtarget.is64Bit() &&
1155            "READCYCLECOUNTER only has custom type legalization on riscv32");
1156 
1157     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
1158     SDValue RCW =
1159         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
1160 
1161     Results.push_back(
1162         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
1163     Results.push_back(RCW.getValue(2));
1164     break;
1165   }
1166   case ISD::ADD:
1167   case ISD::SUB:
1168   case ISD::MUL:
1169     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1170            "Unexpected custom legalisation");
1171     if (N->getOperand(1).getOpcode() == ISD::Constant)
1172       return;
1173     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
1174     break;
1175   case ISD::SHL:
1176   case ISD::SRA:
1177   case ISD::SRL:
1178     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1179            "Unexpected custom legalisation");
1180     if (N->getOperand(1).getOpcode() == ISD::Constant)
1181       return;
1182     Results.push_back(customLegalizeToWOp(N, DAG));
1183     break;
1184   case ISD::ROTL:
1185   case ISD::ROTR:
1186     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1187            "Unexpected custom legalisation");
1188     Results.push_back(customLegalizeToWOp(N, DAG));
1189     break;
1190   case ISD::SDIV:
1191   case ISD::UDIV:
1192   case ISD::UREM:
1193     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1194            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
1195     if (N->getOperand(0).getOpcode() == ISD::Constant ||
1196         N->getOperand(1).getOpcode() == ISD::Constant)
1197       return;
1198     Results.push_back(customLegalizeToWOp(N, DAG));
1199     break;
1200   case ISD::BITCAST: {
1201     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1202              Subtarget.hasStdExtF()) ||
1203             (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
1204            "Unexpected custom legalisation");
1205     SDValue Op0 = N->getOperand(0);
1206     if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
1207       if (Op0.getValueType() != MVT::f16)
1208         return;
1209       SDValue FPConv =
1210           DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
1211       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
1212     } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1213                Subtarget.hasStdExtF()) {
1214       if (Op0.getValueType() != MVT::f32)
1215         return;
1216       SDValue FPConv =
1217           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
1218       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
1219     }
1220     break;
1221   }
1222   case RISCVISD::GREVI:
1223   case RISCVISD::GORCI: {
1224     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1225            "Unexpected custom legalisation");
1226     // This is similar to customLegalizeToWOp, except that we pass the second
1227     // operand (a TargetConstant) straight through: it is already of type
1228     // XLenVT.
1229     SDLoc DL(N);
1230     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1231     SDValue NewOp0 =
1232         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1233     SDValue NewRes =
1234         DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
1235     // ReplaceNodeResults requires we maintain the same type for the return
1236     // value.
1237     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
1238     break;
1239   }
1240   case ISD::BSWAP:
1241   case ISD::BITREVERSE: {
1242     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1243            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1244     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1245                                  N->getOperand(0));
1246     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
1247     SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
1248                                  DAG.getTargetConstant(Imm, DL,
1249                                                        Subtarget.getXLenVT()));
1250     // ReplaceNodeResults requires we maintain the same type for the return
1251     // value.
1252     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
1253     break;
1254   }
1255   case ISD::FSHL:
1256   case ISD::FSHR: {
1257     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1258            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
1259     SDValue NewOp0 =
1260         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1261     SDValue NewOp1 =
1262         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1263     SDValue NewOp2 =
1264         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
1265     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
1266     // Mask the shift amount to 5 bits.
1267     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
1268                          DAG.getConstant(0x1f, DL, MVT::i64));
1269     unsigned Opc =
1270         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
1271     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
1272     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
1273     break;
1274   }
1275   }
1276 }
1277 
1278 // A structure to hold one of the bit-manipulation patterns below. Together, a
1279 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
1280 //   (or (and (shl x, 1), 0xAAAAAAAA),
1281 //       (and (srl x, 1), 0x55555555))
1282 struct RISCVBitmanipPat {
1283   SDValue Op;
1284   unsigned ShAmt;
1285   bool IsSHL;
1286 
1287   bool formsPairWith(const RISCVBitmanipPat &Other) const {
1288     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
1289   }
1290 };
1291 
1292 // Matches any of the following bit-manipulation patterns:
1293 //   (and (shl x, 1), (0x55555555 << 1))
1294 //   (and (srl x, 1), 0x55555555)
1295 //   (shl (and x, 0x55555555), 1)
1296 //   (srl (and x, (0x55555555 << 1)), 1)
1297 // where the shift amount and mask may vary thus:
1298 //   [1]  = 0x55555555 / 0xAAAAAAAA
1299 //   [2]  = 0x33333333 / 0xCCCCCCCC
1300 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
1301 //   [8]  = 0x00FF00FF / 0xFF00FF00
1302 //   [16] = 0x0000FFFF / 0xFFFFFFFF
1303 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
1304 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) {
1305   Optional<uint64_t> Mask;
1306   // Optionally consume a mask around the shift operation.
1307   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
1308     Mask = Op.getConstantOperandVal(1);
1309     Op = Op.getOperand(0);
1310   }
1311   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
1312     return None;
1313   bool IsSHL = Op.getOpcode() == ISD::SHL;
1314 
1315   if (!isa<ConstantSDNode>(Op.getOperand(1)))
1316     return None;
1317   auto ShAmt = Op.getConstantOperandVal(1);
1318 
1319   if (!isPowerOf2_64(ShAmt))
1320     return None;
1321 
1322   // These are the unshifted masks which we use to match bit-manipulation
1323   // patterns. They may be shifted left in certain circumstances.
1324   static const uint64_t BitmanipMasks[] = {
1325       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
1326       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL,
1327   };
1328 
1329   unsigned MaskIdx = Log2_64(ShAmt);
1330   if (MaskIdx >= array_lengthof(BitmanipMasks))
1331     return None;
1332 
1333   auto Src = Op.getOperand(0);
1334 
1335   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
1336   auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
1337 
1338   // The expected mask is shifted left when the AND is found around SHL
1339   // patterns.
1340   //   ((x >> 1) & 0x55555555)
1341   //   ((x << 1) & 0xAAAAAAAA)
1342   bool SHLExpMask = IsSHL;
1343 
1344   if (!Mask) {
1345     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
1346     // the mask is all ones: consume that now.
1347     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
1348       Mask = Src.getConstantOperandVal(1);
1349       Src = Src.getOperand(0);
1350       // The expected mask is now in fact shifted left for SRL, so reverse the
1351       // decision.
1352       //   ((x & 0xAAAAAAAA) >> 1)
1353       //   ((x & 0x55555555) << 1)
1354       SHLExpMask = !SHLExpMask;
1355     } else {
1356       // Use a default shifted mask of all-ones if there's no AND, truncated
1357       // down to the expected width. This simplifies the logic later on.
1358       Mask = maskTrailingOnes<uint64_t>(Width);
1359       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
1360     }
1361   }
1362 
1363   if (SHLExpMask)
1364     ExpMask <<= ShAmt;
1365 
1366   if (Mask != ExpMask)
1367     return None;
1368 
1369   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
1370 }
1371 
1372 // Match the following pattern as a GREVI(W) operation
1373 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
1374 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
1375                                const RISCVSubtarget &Subtarget) {
1376   EVT VT = Op.getValueType();
1377 
1378   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
1379     auto LHS = matchRISCVBitmanipPat(Op.getOperand(0));
1380     auto RHS = matchRISCVBitmanipPat(Op.getOperand(1));
1381     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
1382       SDLoc DL(Op);
1383       return DAG.getNode(
1384           RISCVISD::GREVI, DL, VT, LHS->Op,
1385           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
1386     }
1387   }
1388   return SDValue();
1389 }
1390 
1391 // Matches any the following pattern as a GORCI(W) operation
1392 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
1393 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
1394 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
1395 // Note that with the variant of 3.,
1396 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
1397 // the inner pattern will first be matched as GREVI and then the outer
1398 // pattern will be matched to GORC via the first rule above.
1399 // 4.  (or (rotl/rotr x, bitwidth/2), x)
1400 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
1401                                const RISCVSubtarget &Subtarget) {
1402   EVT VT = Op.getValueType();
1403 
1404   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
1405     SDLoc DL(Op);
1406     SDValue Op0 = Op.getOperand(0);
1407     SDValue Op1 = Op.getOperand(1);
1408 
1409     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
1410       if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
1411           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
1412         return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
1413       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
1414       if ((Reverse.getOpcode() == ISD::ROTL ||
1415            Reverse.getOpcode() == ISD::ROTR) &&
1416           Reverse.getOperand(0) == X &&
1417           isa<ConstantSDNode>(Reverse.getOperand(1))) {
1418         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
1419         if (RotAmt == (VT.getSizeInBits() / 2))
1420           return DAG.getNode(
1421               RISCVISD::GORCI, DL, VT, X,
1422               DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
1423       }
1424       return SDValue();
1425     };
1426 
1427     // Check for either commutable permutation of (or (GREVI x, shamt), x)
1428     if (SDValue V = MatchOROfReverse(Op0, Op1))
1429       return V;
1430     if (SDValue V = MatchOROfReverse(Op1, Op0))
1431       return V;
1432 
1433     // OR is commutable so canonicalize its OR operand to the left
1434     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
1435       std::swap(Op0, Op1);
1436     if (Op0.getOpcode() != ISD::OR)
1437       return SDValue();
1438     SDValue OrOp0 = Op0.getOperand(0);
1439     SDValue OrOp1 = Op0.getOperand(1);
1440     auto LHS = matchRISCVBitmanipPat(OrOp0);
1441     // OR is commutable so swap the operands and try again: x might have been
1442     // on the left
1443     if (!LHS) {
1444       std::swap(OrOp0, OrOp1);
1445       LHS = matchRISCVBitmanipPat(OrOp0);
1446     }
1447     auto RHS = matchRISCVBitmanipPat(Op1);
1448     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
1449       return DAG.getNode(
1450           RISCVISD::GORCI, DL, VT, LHS->Op,
1451           DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
1452     }
1453   }
1454   return SDValue();
1455 }
1456 
1457 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
1458 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
1459 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
1460 // not undo itself, but they are redundant.
1461 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
1462   unsigned ShAmt1 = N->getConstantOperandVal(1);
1463   SDValue Src = N->getOperand(0);
1464 
1465   if (Src.getOpcode() != N->getOpcode())
1466     return SDValue();
1467 
1468   unsigned ShAmt2 = Src.getConstantOperandVal(1);
1469   Src = Src.getOperand(0);
1470 
1471   unsigned CombinedShAmt;
1472   if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
1473     CombinedShAmt = ShAmt1 | ShAmt2;
1474   else
1475     CombinedShAmt = ShAmt1 ^ ShAmt2;
1476 
1477   if (CombinedShAmt == 0)
1478     return Src;
1479 
1480   SDLoc DL(N);
1481   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
1482                      DAG.getTargetConstant(CombinedShAmt, DL,
1483                                            N->getOperand(1).getValueType()));
1484 }
1485 
1486 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1487                                                DAGCombinerInfo &DCI) const {
1488   SelectionDAG &DAG = DCI.DAG;
1489 
1490   switch (N->getOpcode()) {
1491   default:
1492     break;
1493   case RISCVISD::SplitF64: {
1494     SDValue Op0 = N->getOperand(0);
1495     // If the input to SplitF64 is just BuildPairF64 then the operation is
1496     // redundant. Instead, use BuildPairF64's operands directly.
1497     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
1498       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
1499 
1500     SDLoc DL(N);
1501 
1502     // It's cheaper to materialise two 32-bit integers than to load a double
1503     // from the constant pool and transfer it to integer registers through the
1504     // stack.
1505     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
1506       APInt V = C->getValueAPF().bitcastToAPInt();
1507       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
1508       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
1509       return DCI.CombineTo(N, Lo, Hi);
1510     }
1511 
1512     // This is a target-specific version of a DAGCombine performed in
1513     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1514     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1515     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1516     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1517         !Op0.getNode()->hasOneUse())
1518       break;
1519     SDValue NewSplitF64 =
1520         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
1521                     Op0.getOperand(0));
1522     SDValue Lo = NewSplitF64.getValue(0);
1523     SDValue Hi = NewSplitF64.getValue(1);
1524     APInt SignBit = APInt::getSignMask(32);
1525     if (Op0.getOpcode() == ISD::FNEG) {
1526       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
1527                                   DAG.getConstant(SignBit, DL, MVT::i32));
1528       return DCI.CombineTo(N, Lo, NewHi);
1529     }
1530     assert(Op0.getOpcode() == ISD::FABS);
1531     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
1532                                 DAG.getConstant(~SignBit, DL, MVT::i32));
1533     return DCI.CombineTo(N, Lo, NewHi);
1534   }
1535   case RISCVISD::SLLW:
1536   case RISCVISD::SRAW:
1537   case RISCVISD::SRLW:
1538   case RISCVISD::ROLW:
1539   case RISCVISD::RORW: {
1540     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
1541     SDValue LHS = N->getOperand(0);
1542     SDValue RHS = N->getOperand(1);
1543     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
1544     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
1545     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
1546         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
1547       if (N->getOpcode() != ISD::DELETED_NODE)
1548         DCI.AddToWorklist(N);
1549       return SDValue(N, 0);
1550     }
1551     break;
1552   }
1553   case RISCVISD::FSLW:
1554   case RISCVISD::FSRW: {
1555     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
1556     // read.
1557     SDValue Op0 = N->getOperand(0);
1558     SDValue Op1 = N->getOperand(1);
1559     SDValue ShAmt = N->getOperand(2);
1560     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
1561     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
1562     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
1563         SimplifyDemandedBits(Op1, OpMask, DCI) ||
1564         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
1565       if (N->getOpcode() != ISD::DELETED_NODE)
1566         DCI.AddToWorklist(N);
1567       return SDValue(N, 0);
1568     }
1569     break;
1570   }
1571   case RISCVISD::GREVIW:
1572   case RISCVISD::GORCIW: {
1573     // Only the lower 32 bits of the first operand are read
1574     SDValue Op0 = N->getOperand(0);
1575     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
1576     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
1577       if (N->getOpcode() != ISD::DELETED_NODE)
1578         DCI.AddToWorklist(N);
1579       return SDValue(N, 0);
1580     }
1581 
1582     return combineGREVI_GORCI(N, DCI.DAG);
1583   }
1584   case RISCVISD::FMV_X_ANYEXTW_RV64: {
1585     SDLoc DL(N);
1586     SDValue Op0 = N->getOperand(0);
1587     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
1588     // conversion is unnecessary and can be replaced with an ANY_EXTEND
1589     // of the FMV_W_X_RV64 operand.
1590     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
1591       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
1592              "Unexpected value type!");
1593       return Op0.getOperand(0);
1594     }
1595 
1596     // This is a target-specific version of a DAGCombine performed in
1597     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1598     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1599     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1600     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1601         !Op0.getNode()->hasOneUse())
1602       break;
1603     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
1604                                  Op0.getOperand(0));
1605     APInt SignBit = APInt::getSignMask(32).sext(64);
1606     if (Op0.getOpcode() == ISD::FNEG)
1607       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
1608                          DAG.getConstant(SignBit, DL, MVT::i64));
1609 
1610     assert(Op0.getOpcode() == ISD::FABS);
1611     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
1612                        DAG.getConstant(~SignBit, DL, MVT::i64));
1613   }
1614   case RISCVISD::GREVI:
1615   case RISCVISD::GORCI:
1616     return combineGREVI_GORCI(N, DCI.DAG);
1617   case ISD::OR:
1618     if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
1619       return GREV;
1620     if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
1621       return GORC;
1622     break;
1623   }
1624 
1625   return SDValue();
1626 }
1627 
1628 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1629     const SDNode *N, CombineLevel Level) const {
1630   // The following folds are only desirable if `(OP _, c1 << c2)` can be
1631   // materialised in fewer instructions than `(OP _, c1)`:
1632   //
1633   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1634   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1635   SDValue N0 = N->getOperand(0);
1636   EVT Ty = N0.getValueType();
1637   if (Ty.isScalarInteger() &&
1638       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
1639     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
1640     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
1641     if (C1 && C2) {
1642       APInt C1Int = C1->getAPIntValue();
1643       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
1644 
1645       // We can materialise `c1 << c2` into an add immediate, so it's "free",
1646       // and the combine should happen, to potentially allow further combines
1647       // later.
1648       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
1649           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
1650         return true;
1651 
1652       // We can materialise `c1` in an add immediate, so it's "free", and the
1653       // combine should be prevented.
1654       if (C1Int.getMinSignedBits() <= 64 &&
1655           isLegalAddImmediate(C1Int.getSExtValue()))
1656         return false;
1657 
1658       // Neither constant will fit into an immediate, so find materialisation
1659       // costs.
1660       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
1661                                               Subtarget.is64Bit());
1662       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
1663           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
1664 
1665       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
1666       // combine should be prevented.
1667       if (C1Cost < ShiftedC1Cost)
1668         return false;
1669     }
1670   }
1671   return true;
1672 }
1673 
1674 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
1675     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1676     unsigned Depth) const {
1677   switch (Op.getOpcode()) {
1678   default:
1679     break;
1680   case RISCVISD::SLLW:
1681   case RISCVISD::SRAW:
1682   case RISCVISD::SRLW:
1683   case RISCVISD::DIVW:
1684   case RISCVISD::DIVUW:
1685   case RISCVISD::REMUW:
1686   case RISCVISD::ROLW:
1687   case RISCVISD::RORW:
1688   case RISCVISD::GREVIW:
1689   case RISCVISD::GORCIW:
1690   case RISCVISD::FSLW:
1691   case RISCVISD::FSRW:
1692     // TODO: As the result is sign-extended, this is conservatively correct. A
1693     // more precise answer could be calculated for SRAW depending on known
1694     // bits in the shift amount.
1695     return 33;
1696   }
1697 
1698   return 1;
1699 }
1700 
1701 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
1702                                                   MachineBasicBlock *BB) {
1703   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
1704 
1705   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
1706   // Should the count have wrapped while it was being read, we need to try
1707   // again.
1708   // ...
1709   // read:
1710   // rdcycleh x3 # load high word of cycle
1711   // rdcycle  x2 # load low word of cycle
1712   // rdcycleh x4 # load high word of cycle
1713   // bne x3, x4, read # check if high word reads match, otherwise try again
1714   // ...
1715 
1716   MachineFunction &MF = *BB->getParent();
1717   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1718   MachineFunction::iterator It = ++BB->getIterator();
1719 
1720   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1721   MF.insert(It, LoopMBB);
1722 
1723   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1724   MF.insert(It, DoneMBB);
1725 
1726   // Transfer the remainder of BB and its successor edges to DoneMBB.
1727   DoneMBB->splice(DoneMBB->begin(), BB,
1728                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
1729   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
1730 
1731   BB->addSuccessor(LoopMBB);
1732 
1733   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1734   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1735   Register LoReg = MI.getOperand(0).getReg();
1736   Register HiReg = MI.getOperand(1).getReg();
1737   DebugLoc DL = MI.getDebugLoc();
1738 
1739   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1740   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
1741       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
1742       .addReg(RISCV::X0);
1743   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
1744       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
1745       .addReg(RISCV::X0);
1746   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
1747       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
1748       .addReg(RISCV::X0);
1749 
1750   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
1751       .addReg(HiReg)
1752       .addReg(ReadAgainReg)
1753       .addMBB(LoopMBB);
1754 
1755   LoopMBB->addSuccessor(LoopMBB);
1756   LoopMBB->addSuccessor(DoneMBB);
1757 
1758   MI.eraseFromParent();
1759 
1760   return DoneMBB;
1761 }
1762 
1763 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
1764                                              MachineBasicBlock *BB) {
1765   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
1766 
1767   MachineFunction &MF = *BB->getParent();
1768   DebugLoc DL = MI.getDebugLoc();
1769   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1770   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1771   Register LoReg = MI.getOperand(0).getReg();
1772   Register HiReg = MI.getOperand(1).getReg();
1773   Register SrcReg = MI.getOperand(2).getReg();
1774   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
1775   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
1776 
1777   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
1778                           RI);
1779   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
1780   MachineMemOperand *MMOLo =
1781       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
1782   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
1783       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
1784   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
1785       .addFrameIndex(FI)
1786       .addImm(0)
1787       .addMemOperand(MMOLo);
1788   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
1789       .addFrameIndex(FI)
1790       .addImm(4)
1791       .addMemOperand(MMOHi);
1792   MI.eraseFromParent(); // The pseudo instruction is gone now.
1793   return BB;
1794 }
1795 
1796 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
1797                                                  MachineBasicBlock *BB) {
1798   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
1799          "Unexpected instruction");
1800 
1801   MachineFunction &MF = *BB->getParent();
1802   DebugLoc DL = MI.getDebugLoc();
1803   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1804   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1805   Register DstReg = MI.getOperand(0).getReg();
1806   Register LoReg = MI.getOperand(1).getReg();
1807   Register HiReg = MI.getOperand(2).getReg();
1808   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
1809   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
1810 
1811   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
1812   MachineMemOperand *MMOLo =
1813       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
1814   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
1815       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
1816   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
1817       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
1818       .addFrameIndex(FI)
1819       .addImm(0)
1820       .addMemOperand(MMOLo);
1821   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
1822       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
1823       .addFrameIndex(FI)
1824       .addImm(4)
1825       .addMemOperand(MMOHi);
1826   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
1827   MI.eraseFromParent(); // The pseudo instruction is gone now.
1828   return BB;
1829 }
1830 
1831 static bool isSelectPseudo(MachineInstr &MI) {
1832   switch (MI.getOpcode()) {
1833   default:
1834     return false;
1835   case RISCV::Select_GPR_Using_CC_GPR:
1836   case RISCV::Select_FPR16_Using_CC_GPR:
1837   case RISCV::Select_FPR32_Using_CC_GPR:
1838   case RISCV::Select_FPR64_Using_CC_GPR:
1839     return true;
1840   }
1841 }
1842 
1843 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
1844                                            MachineBasicBlock *BB) {
1845   // To "insert" Select_* instructions, we actually have to insert the triangle
1846   // control-flow pattern.  The incoming instructions know the destination vreg
1847   // to set, the condition code register to branch on, the true/false values to
1848   // select between, and the condcode to use to select the appropriate branch.
1849   //
1850   // We produce the following control flow:
1851   //     HeadMBB
1852   //     |  \
1853   //     |  IfFalseMBB
1854   //     | /
1855   //    TailMBB
1856   //
1857   // When we find a sequence of selects we attempt to optimize their emission
1858   // by sharing the control flow. Currently we only handle cases where we have
1859   // multiple selects with the exact same condition (same LHS, RHS and CC).
1860   // The selects may be interleaved with other instructions if the other
1861   // instructions meet some requirements we deem safe:
1862   // - They are debug instructions. Otherwise,
1863   // - They do not have side-effects, do not access memory and their inputs do
1864   //   not depend on the results of the select pseudo-instructions.
1865   // The TrueV/FalseV operands of the selects cannot depend on the result of
1866   // previous selects in the sequence.
1867   // These conditions could be further relaxed. See the X86 target for a
1868   // related approach and more information.
1869   Register LHS = MI.getOperand(1).getReg();
1870   Register RHS = MI.getOperand(2).getReg();
1871   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
1872 
1873   SmallVector<MachineInstr *, 4> SelectDebugValues;
1874   SmallSet<Register, 4> SelectDests;
1875   SelectDests.insert(MI.getOperand(0).getReg());
1876 
1877   MachineInstr *LastSelectPseudo = &MI;
1878 
1879   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
1880        SequenceMBBI != E; ++SequenceMBBI) {
1881     if (SequenceMBBI->isDebugInstr())
1882       continue;
1883     else if (isSelectPseudo(*SequenceMBBI)) {
1884       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
1885           SequenceMBBI->getOperand(2).getReg() != RHS ||
1886           SequenceMBBI->getOperand(3).getImm() != CC ||
1887           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
1888           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
1889         break;
1890       LastSelectPseudo = &*SequenceMBBI;
1891       SequenceMBBI->collectDebugValues(SelectDebugValues);
1892       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
1893     } else {
1894       if (SequenceMBBI->hasUnmodeledSideEffects() ||
1895           SequenceMBBI->mayLoadOrStore())
1896         break;
1897       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
1898             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
1899           }))
1900         break;
1901     }
1902   }
1903 
1904   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1905   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1906   DebugLoc DL = MI.getDebugLoc();
1907   MachineFunction::iterator I = ++BB->getIterator();
1908 
1909   MachineBasicBlock *HeadMBB = BB;
1910   MachineFunction *F = BB->getParent();
1911   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
1912   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
1913 
1914   F->insert(I, IfFalseMBB);
1915   F->insert(I, TailMBB);
1916 
1917   // Transfer debug instructions associated with the selects to TailMBB.
1918   for (MachineInstr *DebugInstr : SelectDebugValues) {
1919     TailMBB->push_back(DebugInstr->removeFromParent());
1920   }
1921 
1922   // Move all instructions after the sequence to TailMBB.
1923   TailMBB->splice(TailMBB->end(), HeadMBB,
1924                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
1925   // Update machine-CFG edges by transferring all successors of the current
1926   // block to the new block which will contain the Phi nodes for the selects.
1927   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
1928   // Set the successors for HeadMBB.
1929   HeadMBB->addSuccessor(IfFalseMBB);
1930   HeadMBB->addSuccessor(TailMBB);
1931 
1932   // Insert appropriate branch.
1933   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
1934 
1935   BuildMI(HeadMBB, DL, TII.get(Opcode))
1936     .addReg(LHS)
1937     .addReg(RHS)
1938     .addMBB(TailMBB);
1939 
1940   // IfFalseMBB just falls through to TailMBB.
1941   IfFalseMBB->addSuccessor(TailMBB);
1942 
1943   // Create PHIs for all of the select pseudo-instructions.
1944   auto SelectMBBI = MI.getIterator();
1945   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
1946   auto InsertionPoint = TailMBB->begin();
1947   while (SelectMBBI != SelectEnd) {
1948     auto Next = std::next(SelectMBBI);
1949     if (isSelectPseudo(*SelectMBBI)) {
1950       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
1951       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
1952               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
1953           .addReg(SelectMBBI->getOperand(4).getReg())
1954           .addMBB(HeadMBB)
1955           .addReg(SelectMBBI->getOperand(5).getReg())
1956           .addMBB(IfFalseMBB);
1957       SelectMBBI->eraseFromParent();
1958     }
1959     SelectMBBI = Next;
1960   }
1961 
1962   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
1963   return TailMBB;
1964 }
1965 
1966 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
1967                                     int VLIndex, unsigned SEWIndex,
1968                                     unsigned VLMul) {
1969   MachineFunction &MF = *BB->getParent();
1970   DebugLoc DL = MI.getDebugLoc();
1971   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1972 
1973   unsigned SEW = MI.getOperand(SEWIndex).getImm();
1974   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
1975   RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
1976 
1977   // LMUL should already be encoded correctly.
1978   RISCVVLMUL Multiplier = static_cast<RISCVVLMUL>(VLMul);
1979 
1980   MachineRegisterInfo &MRI = MF.getRegInfo();
1981 
1982   // VL and VTYPE are alive here.
1983   MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI));
1984 
1985   if (VLIndex >= 0) {
1986     // Set VL (rs1 != X0).
1987     Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
1988     MIB.addReg(DestReg, RegState::Define | RegState::Dead)
1989         .addReg(MI.getOperand(VLIndex).getReg());
1990   } else
1991     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
1992     MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead)
1993         .addReg(RISCV::X0, RegState::Kill);
1994 
1995   // For simplicity we reuse the vtype representation here.
1996   MIB.addImm(RISCVVType::encodeVTYPE(Multiplier, ElementWidth,
1997                                      /*TailAgnostic*/ true,
1998                                      /*MaskAgnostic*/ false));
1999 
2000   // Remove (now) redundant operands from pseudo
2001   MI.getOperand(SEWIndex).setImm(-1);
2002   if (VLIndex >= 0) {
2003     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
2004     MI.getOperand(VLIndex).setIsKill(false);
2005   }
2006 
2007   return BB;
2008 }
2009 
2010 MachineBasicBlock *
2011 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2012                                                  MachineBasicBlock *BB) const {
2013 
2014   if (const RISCVVPseudosTable::PseudoInfo *RVV =
2015           RISCVVPseudosTable::getPseudoInfo(MI.getOpcode())) {
2016     int VLIndex = RVV->getVLIndex();
2017     int SEWIndex = RVV->getSEWIndex();
2018 
2019     assert(SEWIndex >= 0 && "SEWIndex must be >= 0");
2020     return addVSetVL(MI, BB, VLIndex, SEWIndex, RVV->VLMul);
2021   }
2022 
2023   switch (MI.getOpcode()) {
2024   default:
2025     llvm_unreachable("Unexpected instr type to insert");
2026   case RISCV::ReadCycleWide:
2027     assert(!Subtarget.is64Bit() &&
2028            "ReadCycleWrite is only to be used on riscv32");
2029     return emitReadCycleWidePseudo(MI, BB);
2030   case RISCV::Select_GPR_Using_CC_GPR:
2031   case RISCV::Select_FPR16_Using_CC_GPR:
2032   case RISCV::Select_FPR32_Using_CC_GPR:
2033   case RISCV::Select_FPR64_Using_CC_GPR:
2034     return emitSelectPseudo(MI, BB);
2035   case RISCV::BuildPairF64Pseudo:
2036     return emitBuildPairF64Pseudo(MI, BB);
2037   case RISCV::SplitF64Pseudo:
2038     return emitSplitF64Pseudo(MI, BB);
2039   }
2040 }
2041 
2042 // Calling Convention Implementation.
2043 // The expectations for frontend ABI lowering vary from target to target.
2044 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
2045 // details, but this is a longer term goal. For now, we simply try to keep the
2046 // role of the frontend as simple and well-defined as possible. The rules can
2047 // be summarised as:
2048 // * Never split up large scalar arguments. We handle them here.
2049 // * If a hardfloat calling convention is being used, and the struct may be
2050 // passed in a pair of registers (fp+fp, int+fp), and both registers are
2051 // available, then pass as two separate arguments. If either the GPRs or FPRs
2052 // are exhausted, then pass according to the rule below.
2053 // * If a struct could never be passed in registers or directly in a stack
2054 // slot (as it is larger than 2*XLEN and the floating point rules don't
2055 // apply), then pass it using a pointer with the byval attribute.
2056 // * If a struct is less than 2*XLEN, then coerce to either a two-element
2057 // word-sized array or a 2*XLEN scalar (depending on alignment).
2058 // * The frontend can determine whether a struct is returned by reference or
2059 // not based on its size and fields. If it will be returned by reference, the
2060 // frontend must modify the prototype so a pointer with the sret annotation is
2061 // passed as the first argument. This is not necessary for large scalar
2062 // returns.
2063 // * Struct return values and varargs should be coerced to structs containing
2064 // register-size fields in the same situations they would be for fixed
2065 // arguments.
2066 
2067 static const MCPhysReg ArgGPRs[] = {
2068   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
2069   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
2070 };
2071 static const MCPhysReg ArgFPR16s[] = {
2072   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
2073   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
2074 };
2075 static const MCPhysReg ArgFPR32s[] = {
2076   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
2077   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
2078 };
2079 static const MCPhysReg ArgFPR64s[] = {
2080   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
2081   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
2082 };
2083 // This is an interim calling convention and it may be changed in the future.
2084 static const MCPhysReg ArgVRs[] = {
2085   RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, RISCV::V20,
2086   RISCV::V21, RISCV::V22, RISCV::V23
2087 };
2088 static const MCPhysReg ArgVRM2s[] = {
2089   RISCV::V16M2, RISCV::V18M2, RISCV::V20M2, RISCV::V22M2
2090 };
2091 static const MCPhysReg ArgVRM4s[] = {RISCV::V16M4, RISCV::V20M4};
2092 static const MCPhysReg ArgVRM8s[] = {RISCV::V16M8};
2093 
2094 // Pass a 2*XLEN argument that has been split into two XLEN values through
2095 // registers or the stack as necessary.
2096 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
2097                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
2098                                 MVT ValVT2, MVT LocVT2,
2099                                 ISD::ArgFlagsTy ArgFlags2) {
2100   unsigned XLenInBytes = XLen / 8;
2101   if (Register Reg = State.AllocateReg(ArgGPRs)) {
2102     // At least one half can be passed via register.
2103     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
2104                                      VA1.getLocVT(), CCValAssign::Full));
2105   } else {
2106     // Both halves must be passed on the stack, with proper alignment.
2107     Align StackAlign =
2108         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
2109     State.addLoc(
2110         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
2111                             State.AllocateStack(XLenInBytes, StackAlign),
2112                             VA1.getLocVT(), CCValAssign::Full));
2113     State.addLoc(CCValAssign::getMem(
2114         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
2115         LocVT2, CCValAssign::Full));
2116     return false;
2117   }
2118 
2119   if (Register Reg = State.AllocateReg(ArgGPRs)) {
2120     // The second half can also be passed via register.
2121     State.addLoc(
2122         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
2123   } else {
2124     // The second half is passed via the stack, without additional alignment.
2125     State.addLoc(CCValAssign::getMem(
2126         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
2127         LocVT2, CCValAssign::Full));
2128   }
2129 
2130   return false;
2131 }
2132 
2133 // Implements the RISC-V calling convention. Returns true upon failure.
2134 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
2135                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
2136                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
2137                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
2138                      Optional<unsigned> FirstMaskArgument) {
2139   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
2140   assert(XLen == 32 || XLen == 64);
2141   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
2142 
2143   // Any return value split in to more than two values can't be returned
2144   // directly.
2145   if (IsRet && ValNo > 1)
2146     return true;
2147 
2148   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
2149   // variadic argument, or if no F16/F32 argument registers are available.
2150   bool UseGPRForF16_F32 = true;
2151   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
2152   // variadic argument, or if no F64 argument registers are available.
2153   bool UseGPRForF64 = true;
2154 
2155   switch (ABI) {
2156   default:
2157     llvm_unreachable("Unexpected ABI");
2158   case RISCVABI::ABI_ILP32:
2159   case RISCVABI::ABI_LP64:
2160     break;
2161   case RISCVABI::ABI_ILP32F:
2162   case RISCVABI::ABI_LP64F:
2163     UseGPRForF16_F32 = !IsFixed;
2164     break;
2165   case RISCVABI::ABI_ILP32D:
2166   case RISCVABI::ABI_LP64D:
2167     UseGPRForF16_F32 = !IsFixed;
2168     UseGPRForF64 = !IsFixed;
2169     break;
2170   }
2171 
2172   // FPR16, FPR32, and FPR64 alias each other.
2173   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
2174     UseGPRForF16_F32 = true;
2175     UseGPRForF64 = true;
2176   }
2177 
2178   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
2179   // similar local variables rather than directly checking against the target
2180   // ABI.
2181 
2182   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
2183     LocVT = XLenVT;
2184     LocInfo = CCValAssign::BCvt;
2185   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
2186     LocVT = MVT::i64;
2187     LocInfo = CCValAssign::BCvt;
2188   }
2189 
2190   // If this is a variadic argument, the RISC-V calling convention requires
2191   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
2192   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
2193   // be used regardless of whether the original argument was split during
2194   // legalisation or not. The argument will not be passed by registers if the
2195   // original type is larger than 2*XLEN, so the register alignment rule does
2196   // not apply.
2197   unsigned TwoXLenInBytes = (2 * XLen) / 8;
2198   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
2199       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
2200     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
2201     // Skip 'odd' register if necessary.
2202     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
2203       State.AllocateReg(ArgGPRs);
2204   }
2205 
2206   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
2207   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
2208       State.getPendingArgFlags();
2209 
2210   assert(PendingLocs.size() == PendingArgFlags.size() &&
2211          "PendingLocs and PendingArgFlags out of sync");
2212 
2213   // Handle passing f64 on RV32D with a soft float ABI or when floating point
2214   // registers are exhausted.
2215   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
2216     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
2217            "Can't lower f64 if it is split");
2218     // Depending on available argument GPRS, f64 may be passed in a pair of
2219     // GPRs, split between a GPR and the stack, or passed completely on the
2220     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
2221     // cases.
2222     Register Reg = State.AllocateReg(ArgGPRs);
2223     LocVT = MVT::i32;
2224     if (!Reg) {
2225       unsigned StackOffset = State.AllocateStack(8, Align(8));
2226       State.addLoc(
2227           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
2228       return false;
2229     }
2230     if (!State.AllocateReg(ArgGPRs))
2231       State.AllocateStack(4, Align(4));
2232     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2233     return false;
2234   }
2235 
2236   // Split arguments might be passed indirectly, so keep track of the pending
2237   // values.
2238   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
2239     LocVT = XLenVT;
2240     LocInfo = CCValAssign::Indirect;
2241     PendingLocs.push_back(
2242         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
2243     PendingArgFlags.push_back(ArgFlags);
2244     if (!ArgFlags.isSplitEnd()) {
2245       return false;
2246     }
2247   }
2248 
2249   // If the split argument only had two elements, it should be passed directly
2250   // in registers or on the stack.
2251   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
2252     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
2253     // Apply the normal calling convention rules to the first half of the
2254     // split argument.
2255     CCValAssign VA = PendingLocs[0];
2256     ISD::ArgFlagsTy AF = PendingArgFlags[0];
2257     PendingLocs.clear();
2258     PendingArgFlags.clear();
2259     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
2260                                ArgFlags);
2261   }
2262 
2263   // Allocate to a register if possible, or else a stack slot.
2264   Register Reg;
2265   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
2266     Reg = State.AllocateReg(ArgFPR16s);
2267   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
2268     Reg = State.AllocateReg(ArgFPR32s);
2269   else if (ValVT == MVT::f64 && !UseGPRForF64)
2270     Reg = State.AllocateReg(ArgFPR64s);
2271   else if (ValVT.isScalableVector()) {
2272     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
2273     if (RC == &RISCV::VRRegClass) {
2274       // Assign the first mask argument to V0.
2275       // This is an interim calling convention and it may be changed in the
2276       // future.
2277       if (FirstMaskArgument.hasValue() &&
2278           ValNo == FirstMaskArgument.getValue()) {
2279         Reg = State.AllocateReg(RISCV::V0);
2280       } else {
2281         Reg = State.AllocateReg(ArgVRs);
2282       }
2283     } else if (RC == &RISCV::VRM2RegClass) {
2284       Reg = State.AllocateReg(ArgVRM2s);
2285     } else if (RC == &RISCV::VRM4RegClass) {
2286       Reg = State.AllocateReg(ArgVRM4s);
2287     } else if (RC == &RISCV::VRM8RegClass) {
2288       Reg = State.AllocateReg(ArgVRM8s);
2289     } else {
2290       llvm_unreachable("Unhandled class register for ValueType");
2291     }
2292     if (!Reg) {
2293       LocInfo = CCValAssign::Indirect;
2294       // Try using a GPR to pass the address
2295       Reg = State.AllocateReg(ArgGPRs);
2296       LocVT = XLenVT;
2297     }
2298   } else
2299     Reg = State.AllocateReg(ArgGPRs);
2300   unsigned StackOffset =
2301       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
2302 
2303   // If we reach this point and PendingLocs is non-empty, we must be at the
2304   // end of a split argument that must be passed indirectly.
2305   if (!PendingLocs.empty()) {
2306     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
2307     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
2308 
2309     for (auto &It : PendingLocs) {
2310       if (Reg)
2311         It.convertToReg(Reg);
2312       else
2313         It.convertToMem(StackOffset);
2314       State.addLoc(It);
2315     }
2316     PendingLocs.clear();
2317     PendingArgFlags.clear();
2318     return false;
2319   }
2320 
2321   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
2322           (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
2323          "Expected an XLenVT or scalable vector types at this stage");
2324 
2325   if (Reg) {
2326     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2327     return false;
2328   }
2329 
2330   // When a floating-point value is passed on the stack, no bit-conversion is
2331   // needed.
2332   if (ValVT.isFloatingPoint()) {
2333     LocVT = ValVT;
2334     LocInfo = CCValAssign::Full;
2335   }
2336   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
2337   return false;
2338 }
2339 
2340 template <typename ArgTy>
2341 static void preAssignMask(const ArgTy &Args,
2342                           Optional<unsigned> &FirstMaskArgument,
2343                           CCState &CCInfo) {
2344   unsigned NumArgs = Args.size();
2345   for (unsigned I = 0; I != NumArgs; ++I) {
2346     MVT ArgVT = Args[I].VT;
2347     if (!ArgVT.isScalableVector() ||
2348         ArgVT.getVectorElementType().SimpleTy != MVT::i1)
2349       continue;
2350 
2351     FirstMaskArgument = I;
2352     break;
2353   }
2354 }
2355 
2356 void RISCVTargetLowering::analyzeInputArgs(
2357     MachineFunction &MF, CCState &CCInfo,
2358     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
2359   unsigned NumArgs = Ins.size();
2360   FunctionType *FType = MF.getFunction().getFunctionType();
2361 
2362   Optional<unsigned> FirstMaskArgument;
2363   if (Subtarget.hasStdExtV())
2364     preAssignMask(Ins, FirstMaskArgument, CCInfo);
2365 
2366   for (unsigned i = 0; i != NumArgs; ++i) {
2367     MVT ArgVT = Ins[i].VT;
2368     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
2369 
2370     Type *ArgTy = nullptr;
2371     if (IsRet)
2372       ArgTy = FType->getReturnType();
2373     else if (Ins[i].isOrigArg())
2374       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
2375 
2376     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2377     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
2378                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
2379                  FirstMaskArgument)) {
2380       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
2381                         << EVT(ArgVT).getEVTString() << '\n');
2382       llvm_unreachable(nullptr);
2383     }
2384   }
2385 }
2386 
2387 void RISCVTargetLowering::analyzeOutputArgs(
2388     MachineFunction &MF, CCState &CCInfo,
2389     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
2390     CallLoweringInfo *CLI) const {
2391   unsigned NumArgs = Outs.size();
2392 
2393   Optional<unsigned> FirstMaskArgument;
2394   if (Subtarget.hasStdExtV())
2395     preAssignMask(Outs, FirstMaskArgument, CCInfo);
2396 
2397   for (unsigned i = 0; i != NumArgs; i++) {
2398     MVT ArgVT = Outs[i].VT;
2399     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2400     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
2401 
2402     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2403     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
2404                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
2405                  FirstMaskArgument)) {
2406       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
2407                         << EVT(ArgVT).getEVTString() << "\n");
2408       llvm_unreachable(nullptr);
2409     }
2410   }
2411 }
2412 
2413 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
2414 // values.
2415 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
2416                                    const CCValAssign &VA, const SDLoc &DL) {
2417   switch (VA.getLocInfo()) {
2418   default:
2419     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2420   case CCValAssign::Full:
2421     break;
2422   case CCValAssign::BCvt:
2423     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
2424       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
2425     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2426       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
2427     else
2428       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
2429     break;
2430   }
2431   return Val;
2432 }
2433 
2434 // The caller is responsible for loading the full value if the argument is
2435 // passed with CCValAssign::Indirect.
2436 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
2437                                 const CCValAssign &VA, const SDLoc &DL,
2438                                 const RISCVTargetLowering &TLI) {
2439   MachineFunction &MF = DAG.getMachineFunction();
2440   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2441   EVT LocVT = VA.getLocVT();
2442   SDValue Val;
2443   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
2444   Register VReg = RegInfo.createVirtualRegister(RC);
2445   RegInfo.addLiveIn(VA.getLocReg(), VReg);
2446   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2447 
2448   if (VA.getLocInfo() == CCValAssign::Indirect)
2449     return Val;
2450 
2451   return convertLocVTToValVT(DAG, Val, VA, DL);
2452 }
2453 
2454 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
2455                                    const CCValAssign &VA, const SDLoc &DL) {
2456   EVT LocVT = VA.getLocVT();
2457 
2458   switch (VA.getLocInfo()) {
2459   default:
2460     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2461   case CCValAssign::Full:
2462     break;
2463   case CCValAssign::BCvt:
2464     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
2465       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
2466     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
2467       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
2468     else
2469       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
2470     break;
2471   }
2472   return Val;
2473 }
2474 
2475 // The caller is responsible for loading the full value if the argument is
2476 // passed with CCValAssign::Indirect.
2477 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
2478                                 const CCValAssign &VA, const SDLoc &DL) {
2479   MachineFunction &MF = DAG.getMachineFunction();
2480   MachineFrameInfo &MFI = MF.getFrameInfo();
2481   EVT LocVT = VA.getLocVT();
2482   EVT ValVT = VA.getValVT();
2483   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
2484   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2485                                  VA.getLocMemOffset(), /*Immutable=*/true);
2486   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2487   SDValue Val;
2488 
2489   ISD::LoadExtType ExtType;
2490   switch (VA.getLocInfo()) {
2491   default:
2492     llvm_unreachable("Unexpected CCValAssign::LocInfo");
2493   case CCValAssign::Full:
2494   case CCValAssign::Indirect:
2495   case CCValAssign::BCvt:
2496     ExtType = ISD::NON_EXTLOAD;
2497     break;
2498   }
2499   Val = DAG.getExtLoad(
2500       ExtType, DL, LocVT, Chain, FIN,
2501       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
2502   return Val;
2503 }
2504 
2505 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
2506                                        const CCValAssign &VA, const SDLoc &DL) {
2507   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
2508          "Unexpected VA");
2509   MachineFunction &MF = DAG.getMachineFunction();
2510   MachineFrameInfo &MFI = MF.getFrameInfo();
2511   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2512 
2513   if (VA.isMemLoc()) {
2514     // f64 is passed on the stack.
2515     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
2516     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
2517     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
2518                        MachinePointerInfo::getFixedStack(MF, FI));
2519   }
2520 
2521   assert(VA.isRegLoc() && "Expected register VA assignment");
2522 
2523   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2524   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
2525   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
2526   SDValue Hi;
2527   if (VA.getLocReg() == RISCV::X17) {
2528     // Second half of f64 is passed on the stack.
2529     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
2530     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
2531     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
2532                      MachinePointerInfo::getFixedStack(MF, FI));
2533   } else {
2534     // Second half of f64 is passed in another GPR.
2535     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2536     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
2537     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
2538   }
2539   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
2540 }
2541 
2542 // FastCC has less than 1% performance improvement for some particular
2543 // benchmark. But theoretically, it may has benenfit for some cases.
2544 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
2545                             CCValAssign::LocInfo LocInfo,
2546                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
2547 
2548   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2549     // X5 and X6 might be used for save-restore libcall.
2550     static const MCPhysReg GPRList[] = {
2551         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
2552         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
2553         RISCV::X29, RISCV::X30, RISCV::X31};
2554     if (unsigned Reg = State.AllocateReg(GPRList)) {
2555       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2556       return false;
2557     }
2558   }
2559 
2560   if (LocVT == MVT::f16) {
2561     static const MCPhysReg FPR16List[] = {
2562         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
2563         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
2564         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
2565         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
2566     if (unsigned Reg = State.AllocateReg(FPR16List)) {
2567       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2568       return false;
2569     }
2570   }
2571 
2572   if (LocVT == MVT::f32) {
2573     static const MCPhysReg FPR32List[] = {
2574         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
2575         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
2576         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
2577         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
2578     if (unsigned Reg = State.AllocateReg(FPR32List)) {
2579       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2580       return false;
2581     }
2582   }
2583 
2584   if (LocVT == MVT::f64) {
2585     static const MCPhysReg FPR64List[] = {
2586         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
2587         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
2588         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
2589         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
2590     if (unsigned Reg = State.AllocateReg(FPR64List)) {
2591       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2592       return false;
2593     }
2594   }
2595 
2596   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
2597     unsigned Offset4 = State.AllocateStack(4, Align(4));
2598     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
2599     return false;
2600   }
2601 
2602   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
2603     unsigned Offset5 = State.AllocateStack(8, Align(8));
2604     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
2605     return false;
2606   }
2607 
2608   return true; // CC didn't match.
2609 }
2610 
2611 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
2612                          CCValAssign::LocInfo LocInfo,
2613                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
2614 
2615   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
2616     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
2617     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
2618     static const MCPhysReg GPRList[] = {
2619         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
2620         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
2621     if (unsigned Reg = State.AllocateReg(GPRList)) {
2622       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2623       return false;
2624     }
2625   }
2626 
2627   if (LocVT == MVT::f32) {
2628     // Pass in STG registers: F1, ..., F6
2629     //                        fs0 ... fs5
2630     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
2631                                           RISCV::F18_F, RISCV::F19_F,
2632                                           RISCV::F20_F, RISCV::F21_F};
2633     if (unsigned Reg = State.AllocateReg(FPR32List)) {
2634       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2635       return false;
2636     }
2637   }
2638 
2639   if (LocVT == MVT::f64) {
2640     // Pass in STG registers: D1, ..., D6
2641     //                        fs6 ... fs11
2642     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
2643                                           RISCV::F24_D, RISCV::F25_D,
2644                                           RISCV::F26_D, RISCV::F27_D};
2645     if (unsigned Reg = State.AllocateReg(FPR64List)) {
2646       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
2647       return false;
2648     }
2649   }
2650 
2651   report_fatal_error("No registers left in GHC calling convention");
2652   return true;
2653 }
2654 
2655 // Transform physical registers into virtual registers.
2656 SDValue RISCVTargetLowering::LowerFormalArguments(
2657     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2658     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2659     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2660 
2661   MachineFunction &MF = DAG.getMachineFunction();
2662 
2663   switch (CallConv) {
2664   default:
2665     report_fatal_error("Unsupported calling convention");
2666   case CallingConv::C:
2667   case CallingConv::Fast:
2668     break;
2669   case CallingConv::GHC:
2670     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
2671         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
2672       report_fatal_error(
2673         "GHC calling convention requires the F and D instruction set extensions");
2674   }
2675 
2676   const Function &Func = MF.getFunction();
2677   if (Func.hasFnAttribute("interrupt")) {
2678     if (!Func.arg_empty())
2679       report_fatal_error(
2680         "Functions with the interrupt attribute cannot have arguments!");
2681 
2682     StringRef Kind =
2683       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
2684 
2685     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
2686       report_fatal_error(
2687         "Function interrupt attribute argument not supported!");
2688   }
2689 
2690   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2691   MVT XLenVT = Subtarget.getXLenVT();
2692   unsigned XLenInBytes = Subtarget.getXLen() / 8;
2693   // Used with vargs to acumulate store chains.
2694   std::vector<SDValue> OutChains;
2695 
2696   // Assign locations to all of the incoming arguments.
2697   SmallVector<CCValAssign, 16> ArgLocs;
2698   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2699 
2700   if (CallConv == CallingConv::Fast)
2701     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
2702   else if (CallConv == CallingConv::GHC)
2703     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
2704   else
2705     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
2706 
2707   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2708     CCValAssign &VA = ArgLocs[i];
2709     SDValue ArgValue;
2710     // Passing f64 on RV32D with a soft float ABI must be handled as a special
2711     // case.
2712     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
2713       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
2714     else if (VA.isRegLoc())
2715       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
2716     else
2717       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
2718 
2719     if (VA.getLocInfo() == CCValAssign::Indirect) {
2720       // If the original argument was split and passed by reference (e.g. i128
2721       // on RV32), we need to load all parts of it here (using the same
2722       // address).
2723       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2724                                    MachinePointerInfo()));
2725       unsigned ArgIndex = Ins[i].OrigArgIndex;
2726       assert(Ins[i].PartOffset == 0);
2727       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
2728         CCValAssign &PartVA = ArgLocs[i + 1];
2729         unsigned PartOffset = Ins[i + 1].PartOffset;
2730         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2731                                       DAG.getIntPtrConstant(PartOffset, DL));
2732         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2733                                      MachinePointerInfo()));
2734         ++i;
2735       }
2736       continue;
2737     }
2738     InVals.push_back(ArgValue);
2739   }
2740 
2741   if (IsVarArg) {
2742     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
2743     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
2744     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
2745     MachineFrameInfo &MFI = MF.getFrameInfo();
2746     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2747     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
2748 
2749     // Offset of the first variable argument from stack pointer, and size of
2750     // the vararg save area. For now, the varargs save area is either zero or
2751     // large enough to hold a0-a7.
2752     int VaArgOffset, VarArgsSaveSize;
2753 
2754     // If all registers are allocated, then all varargs must be passed on the
2755     // stack and we don't need to save any argregs.
2756     if (ArgRegs.size() == Idx) {
2757       VaArgOffset = CCInfo.getNextStackOffset();
2758       VarArgsSaveSize = 0;
2759     } else {
2760       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
2761       VaArgOffset = -VarArgsSaveSize;
2762     }
2763 
2764     // Record the frame index of the first variable argument
2765     // which is a value necessary to VASTART.
2766     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
2767     RVFI->setVarArgsFrameIndex(FI);
2768 
2769     // If saving an odd number of registers then create an extra stack slot to
2770     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
2771     // offsets to even-numbered registered remain 2*XLEN-aligned.
2772     if (Idx % 2) {
2773       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
2774       VarArgsSaveSize += XLenInBytes;
2775     }
2776 
2777     // Copy the integer registers that may have been used for passing varargs
2778     // to the vararg save area.
2779     for (unsigned I = Idx; I < ArgRegs.size();
2780          ++I, VaArgOffset += XLenInBytes) {
2781       const Register Reg = RegInfo.createVirtualRegister(RC);
2782       RegInfo.addLiveIn(ArgRegs[I], Reg);
2783       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
2784       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
2785       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2786       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
2787                                    MachinePointerInfo::getFixedStack(MF, FI));
2788       cast<StoreSDNode>(Store.getNode())
2789           ->getMemOperand()
2790           ->setValue((Value *)nullptr);
2791       OutChains.push_back(Store);
2792     }
2793     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
2794   }
2795 
2796   // All stores are grouped in one node to allow the matching between
2797   // the size of Ins and InVals. This only happens for vararg functions.
2798   if (!OutChains.empty()) {
2799     OutChains.push_back(Chain);
2800     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
2801   }
2802 
2803   return Chain;
2804 }
2805 
2806 /// isEligibleForTailCallOptimization - Check whether the call is eligible
2807 /// for tail call optimization.
2808 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
2809 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
2810     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
2811     const SmallVector<CCValAssign, 16> &ArgLocs) const {
2812 
2813   auto &Callee = CLI.Callee;
2814   auto CalleeCC = CLI.CallConv;
2815   auto &Outs = CLI.Outs;
2816   auto &Caller = MF.getFunction();
2817   auto CallerCC = Caller.getCallingConv();
2818 
2819   // Exception-handling functions need a special set of instructions to
2820   // indicate a return to the hardware. Tail-calling another function would
2821   // probably break this.
2822   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
2823   // should be expanded as new function attributes are introduced.
2824   if (Caller.hasFnAttribute("interrupt"))
2825     return false;
2826 
2827   // Do not tail call opt if the stack is used to pass parameters.
2828   if (CCInfo.getNextStackOffset() != 0)
2829     return false;
2830 
2831   // Do not tail call opt if any parameters need to be passed indirectly.
2832   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
2833   // passed indirectly. So the address of the value will be passed in a
2834   // register, or if not available, then the address is put on the stack. In
2835   // order to pass indirectly, space on the stack often needs to be allocated
2836   // in order to store the value. In this case the CCInfo.getNextStackOffset()
2837   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
2838   // are passed CCValAssign::Indirect.
2839   for (auto &VA : ArgLocs)
2840     if (VA.getLocInfo() == CCValAssign::Indirect)
2841       return false;
2842 
2843   // Do not tail call opt if either caller or callee uses struct return
2844   // semantics.
2845   auto IsCallerStructRet = Caller.hasStructRetAttr();
2846   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
2847   if (IsCallerStructRet || IsCalleeStructRet)
2848     return false;
2849 
2850   // Externally-defined functions with weak linkage should not be
2851   // tail-called. The behaviour of branch instructions in this situation (as
2852   // used for tail calls) is implementation-defined, so we cannot rely on the
2853   // linker replacing the tail call with a return.
2854   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2855     const GlobalValue *GV = G->getGlobal();
2856     if (GV->hasExternalWeakLinkage())
2857       return false;
2858   }
2859 
2860   // The callee has to preserve all registers the caller needs to preserve.
2861   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
2862   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2863   if (CalleeCC != CallerCC) {
2864     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2865     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2866       return false;
2867   }
2868 
2869   // Byval parameters hand the function a pointer directly into the stack area
2870   // we want to reuse during a tail call. Working around this *is* possible
2871   // but less efficient and uglier in LowerCall.
2872   for (auto &Arg : Outs)
2873     if (Arg.Flags.isByVal())
2874       return false;
2875 
2876   return true;
2877 }
2878 
2879 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
2880 // and output parameter nodes.
2881 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
2882                                        SmallVectorImpl<SDValue> &InVals) const {
2883   SelectionDAG &DAG = CLI.DAG;
2884   SDLoc &DL = CLI.DL;
2885   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2886   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2887   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2888   SDValue Chain = CLI.Chain;
2889   SDValue Callee = CLI.Callee;
2890   bool &IsTailCall = CLI.IsTailCall;
2891   CallingConv::ID CallConv = CLI.CallConv;
2892   bool IsVarArg = CLI.IsVarArg;
2893   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2894   MVT XLenVT = Subtarget.getXLenVT();
2895 
2896   MachineFunction &MF = DAG.getMachineFunction();
2897 
2898   // Analyze the operands of the call, assigning locations to each operand.
2899   SmallVector<CCValAssign, 16> ArgLocs;
2900   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2901 
2902   if (CallConv == CallingConv::Fast)
2903     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
2904   else if (CallConv == CallingConv::GHC)
2905     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
2906   else
2907     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
2908 
2909   // Check if it's really possible to do a tail call.
2910   if (IsTailCall)
2911     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
2912 
2913   if (IsTailCall)
2914     ++NumTailCalls;
2915   else if (CLI.CB && CLI.CB->isMustTailCall())
2916     report_fatal_error("failed to perform tail call elimination on a call "
2917                        "site marked musttail");
2918 
2919   // Get a count of how many bytes are to be pushed on the stack.
2920   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
2921 
2922   // Create local copies for byval args
2923   SmallVector<SDValue, 8> ByValArgs;
2924   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2925     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2926     if (!Flags.isByVal())
2927       continue;
2928 
2929     SDValue Arg = OutVals[i];
2930     unsigned Size = Flags.getByValSize();
2931     Align Alignment = Flags.getNonZeroByValAlign();
2932 
2933     int FI =
2934         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
2935     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2936     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
2937 
2938     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
2939                           /*IsVolatile=*/false,
2940                           /*AlwaysInline=*/false, IsTailCall,
2941                           MachinePointerInfo(), MachinePointerInfo());
2942     ByValArgs.push_back(FIPtr);
2943   }
2944 
2945   if (!IsTailCall)
2946     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
2947 
2948   // Copy argument values to their designated locations.
2949   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
2950   SmallVector<SDValue, 8> MemOpChains;
2951   SDValue StackPtr;
2952   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
2953     CCValAssign &VA = ArgLocs[i];
2954     SDValue ArgValue = OutVals[i];
2955     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2956 
2957     // Handle passing f64 on RV32D with a soft float ABI as a special case.
2958     bool IsF64OnRV32DSoftABI =
2959         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
2960     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
2961       SDValue SplitF64 = DAG.getNode(
2962           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
2963       SDValue Lo = SplitF64.getValue(0);
2964       SDValue Hi = SplitF64.getValue(1);
2965 
2966       Register RegLo = VA.getLocReg();
2967       RegsToPass.push_back(std::make_pair(RegLo, Lo));
2968 
2969       if (RegLo == RISCV::X17) {
2970         // Second half of f64 is passed on the stack.
2971         // Work out the address of the stack slot.
2972         if (!StackPtr.getNode())
2973           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
2974         // Emit the store.
2975         MemOpChains.push_back(
2976             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
2977       } else {
2978         // Second half of f64 is passed in another GPR.
2979         assert(RegLo < RISCV::X31 && "Invalid register pair");
2980         Register RegHigh = RegLo + 1;
2981         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
2982       }
2983       continue;
2984     }
2985 
2986     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
2987     // as any other MemLoc.
2988 
2989     // Promote the value if needed.
2990     // For now, only handle fully promoted and indirect arguments.
2991     if (VA.getLocInfo() == CCValAssign::Indirect) {
2992       // Store the argument in a stack slot and pass its address.
2993       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
2994       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2995       MemOpChains.push_back(
2996           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2997                        MachinePointerInfo::getFixedStack(MF, FI)));
2998       // If the original argument was split (e.g. i128), we need
2999       // to store all parts of it here (and pass just one address).
3000       unsigned ArgIndex = Outs[i].OrigArgIndex;
3001       assert(Outs[i].PartOffset == 0);
3002       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
3003         SDValue PartValue = OutVals[i + 1];
3004         unsigned PartOffset = Outs[i + 1].PartOffset;
3005         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
3006                                       DAG.getIntPtrConstant(PartOffset, DL));
3007         MemOpChains.push_back(
3008             DAG.getStore(Chain, DL, PartValue, Address,
3009                          MachinePointerInfo::getFixedStack(MF, FI)));
3010         ++i;
3011       }
3012       ArgValue = SpillSlot;
3013     } else {
3014       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
3015     }
3016 
3017     // Use local copy if it is a byval arg.
3018     if (Flags.isByVal())
3019       ArgValue = ByValArgs[j++];
3020 
3021     if (VA.isRegLoc()) {
3022       // Queue up the argument copies and emit them at the end.
3023       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
3024     } else {
3025       assert(VA.isMemLoc() && "Argument not register or memory");
3026       assert(!IsTailCall && "Tail call not allowed if stack is used "
3027                             "for passing parameters");
3028 
3029       // Work out the address of the stack slot.
3030       if (!StackPtr.getNode())
3031         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
3032       SDValue Address =
3033           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
3034                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
3035 
3036       // Emit the store.
3037       MemOpChains.push_back(
3038           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
3039     }
3040   }
3041 
3042   // Join the stores, which are independent of one another.
3043   if (!MemOpChains.empty())
3044     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3045 
3046   SDValue Glue;
3047 
3048   // Build a sequence of copy-to-reg nodes, chained and glued together.
3049   for (auto &Reg : RegsToPass) {
3050     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
3051     Glue = Chain.getValue(1);
3052   }
3053 
3054   // Validate that none of the argument registers have been marked as
3055   // reserved, if so report an error. Do the same for the return address if this
3056   // is not a tailcall.
3057   validateCCReservedRegs(RegsToPass, MF);
3058   if (!IsTailCall &&
3059       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
3060     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3061         MF.getFunction(),
3062         "Return address register required, but has been reserved."});
3063 
3064   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
3065   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
3066   // split it and then direct call can be matched by PseudoCALL.
3067   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
3068     const GlobalValue *GV = S->getGlobal();
3069 
3070     unsigned OpFlags = RISCVII::MO_CALL;
3071     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
3072       OpFlags = RISCVII::MO_PLT;
3073 
3074     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
3075   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3076     unsigned OpFlags = RISCVII::MO_CALL;
3077 
3078     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
3079                                                  nullptr))
3080       OpFlags = RISCVII::MO_PLT;
3081 
3082     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
3083   }
3084 
3085   // The first call operand is the chain and the second is the target address.
3086   SmallVector<SDValue, 8> Ops;
3087   Ops.push_back(Chain);
3088   Ops.push_back(Callee);
3089 
3090   // Add argument registers to the end of the list so that they are
3091   // known live into the call.
3092   for (auto &Reg : RegsToPass)
3093     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
3094 
3095   if (!IsTailCall) {
3096     // Add a register mask operand representing the call-preserved registers.
3097     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3098     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
3099     assert(Mask && "Missing call preserved mask for calling convention");
3100     Ops.push_back(DAG.getRegisterMask(Mask));
3101   }
3102 
3103   // Glue the call to the argument copies, if any.
3104   if (Glue.getNode())
3105     Ops.push_back(Glue);
3106 
3107   // Emit the call.
3108   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3109 
3110   if (IsTailCall) {
3111     MF.getFrameInfo().setHasTailCall();
3112     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
3113   }
3114 
3115   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
3116   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
3117   Glue = Chain.getValue(1);
3118 
3119   // Mark the end of the call, which is glued to the call itself.
3120   Chain = DAG.getCALLSEQ_END(Chain,
3121                              DAG.getConstant(NumBytes, DL, PtrVT, true),
3122                              DAG.getConstant(0, DL, PtrVT, true),
3123                              Glue, DL);
3124   Glue = Chain.getValue(1);
3125 
3126   // Assign locations to each value returned by this call.
3127   SmallVector<CCValAssign, 16> RVLocs;
3128   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
3129   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
3130 
3131   // Copy all of the result registers out of their specified physreg.
3132   for (auto &VA : RVLocs) {
3133     // Copy the value out
3134     SDValue RetValue =
3135         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
3136     // Glue the RetValue to the end of the call sequence
3137     Chain = RetValue.getValue(1);
3138     Glue = RetValue.getValue(2);
3139 
3140     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
3141       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
3142       SDValue RetValue2 =
3143           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
3144       Chain = RetValue2.getValue(1);
3145       Glue = RetValue2.getValue(2);
3146       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
3147                              RetValue2);
3148     }
3149 
3150     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
3151 
3152     InVals.push_back(RetValue);
3153   }
3154 
3155   return Chain;
3156 }
3157 
3158 bool RISCVTargetLowering::CanLowerReturn(
3159     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
3160     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3161   SmallVector<CCValAssign, 16> RVLocs;
3162   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
3163 
3164   Optional<unsigned> FirstMaskArgument;
3165   if (Subtarget.hasStdExtV())
3166     preAssignMask(Outs, FirstMaskArgument, CCInfo);
3167 
3168   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3169     MVT VT = Outs[i].VT;
3170     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3171     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3172     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
3173                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
3174                  *this, FirstMaskArgument))
3175       return false;
3176   }
3177   return true;
3178 }
3179 
3180 SDValue
3181 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3182                                  bool IsVarArg,
3183                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
3184                                  const SmallVectorImpl<SDValue> &OutVals,
3185                                  const SDLoc &DL, SelectionDAG &DAG) const {
3186   const MachineFunction &MF = DAG.getMachineFunction();
3187   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
3188 
3189   // Stores the assignment of the return value to a location.
3190   SmallVector<CCValAssign, 16> RVLocs;
3191 
3192   // Info about the registers and stack slot.
3193   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
3194                  *DAG.getContext());
3195 
3196   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
3197                     nullptr);
3198 
3199   if (CallConv == CallingConv::GHC && !RVLocs.empty())
3200     report_fatal_error("GHC functions return void only");
3201 
3202   SDValue Glue;
3203   SmallVector<SDValue, 4> RetOps(1, Chain);
3204 
3205   // Copy the result values into the output registers.
3206   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
3207     SDValue Val = OutVals[i];
3208     CCValAssign &VA = RVLocs[i];
3209     assert(VA.isRegLoc() && "Can only return in registers!");
3210 
3211     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
3212       // Handle returning f64 on RV32D with a soft float ABI.
3213       assert(VA.isRegLoc() && "Expected return via registers");
3214       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
3215                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
3216       SDValue Lo = SplitF64.getValue(0);
3217       SDValue Hi = SplitF64.getValue(1);
3218       Register RegLo = VA.getLocReg();
3219       assert(RegLo < RISCV::X31 && "Invalid register pair");
3220       Register RegHi = RegLo + 1;
3221 
3222       if (STI.isRegisterReservedByUser(RegLo) ||
3223           STI.isRegisterReservedByUser(RegHi))
3224         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3225             MF.getFunction(),
3226             "Return value register required, but has been reserved."});
3227 
3228       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
3229       Glue = Chain.getValue(1);
3230       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
3231       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
3232       Glue = Chain.getValue(1);
3233       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
3234     } else {
3235       // Handle a 'normal' return.
3236       Val = convertValVTToLocVT(DAG, Val, VA, DL);
3237       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
3238 
3239       if (STI.isRegisterReservedByUser(VA.getLocReg()))
3240         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3241             MF.getFunction(),
3242             "Return value register required, but has been reserved."});
3243 
3244       // Guarantee that all emitted copies are stuck together.
3245       Glue = Chain.getValue(1);
3246       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3247     }
3248   }
3249 
3250   RetOps[0] = Chain; // Update chain.
3251 
3252   // Add the glue node if we have it.
3253   if (Glue.getNode()) {
3254     RetOps.push_back(Glue);
3255   }
3256 
3257   // Interrupt service routines use different return instructions.
3258   const Function &Func = DAG.getMachineFunction().getFunction();
3259   if (Func.hasFnAttribute("interrupt")) {
3260     if (!Func.getReturnType()->isVoidTy())
3261       report_fatal_error(
3262           "Functions with the interrupt attribute must have void return type!");
3263 
3264     MachineFunction &MF = DAG.getMachineFunction();
3265     StringRef Kind =
3266       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
3267 
3268     unsigned RetOpc;
3269     if (Kind == "user")
3270       RetOpc = RISCVISD::URET_FLAG;
3271     else if (Kind == "supervisor")
3272       RetOpc = RISCVISD::SRET_FLAG;
3273     else
3274       RetOpc = RISCVISD::MRET_FLAG;
3275 
3276     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
3277   }
3278 
3279   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
3280 }
3281 
3282 void RISCVTargetLowering::validateCCReservedRegs(
3283     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
3284     MachineFunction &MF) const {
3285   const Function &F = MF.getFunction();
3286   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
3287 
3288   if (std::any_of(std::begin(Regs), std::end(Regs), [&STI](auto Reg) {
3289         return STI.isRegisterReservedByUser(Reg.first);
3290       }))
3291     F.getContext().diagnose(DiagnosticInfoUnsupported{
3292         F, "Argument register required, but has been reserved."});
3293 }
3294 
3295 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3296   return CI->isTailCall();
3297 }
3298 
3299 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
3300 #define NODE_NAME_CASE(NODE)                                                   \
3301   case RISCVISD::NODE:                                                         \
3302     return "RISCVISD::" #NODE;
3303   // clang-format off
3304   switch ((RISCVISD::NodeType)Opcode) {
3305   case RISCVISD::FIRST_NUMBER:
3306     break;
3307   NODE_NAME_CASE(RET_FLAG)
3308   NODE_NAME_CASE(URET_FLAG)
3309   NODE_NAME_CASE(SRET_FLAG)
3310   NODE_NAME_CASE(MRET_FLAG)
3311   NODE_NAME_CASE(CALL)
3312   NODE_NAME_CASE(SELECT_CC)
3313   NODE_NAME_CASE(BuildPairF64)
3314   NODE_NAME_CASE(SplitF64)
3315   NODE_NAME_CASE(TAIL)
3316   NODE_NAME_CASE(SLLW)
3317   NODE_NAME_CASE(SRAW)
3318   NODE_NAME_CASE(SRLW)
3319   NODE_NAME_CASE(DIVW)
3320   NODE_NAME_CASE(DIVUW)
3321   NODE_NAME_CASE(REMUW)
3322   NODE_NAME_CASE(ROLW)
3323   NODE_NAME_CASE(RORW)
3324   NODE_NAME_CASE(FSLW)
3325   NODE_NAME_CASE(FSRW)
3326   NODE_NAME_CASE(FMV_H_X)
3327   NODE_NAME_CASE(FMV_X_ANYEXTH)
3328   NODE_NAME_CASE(FMV_W_X_RV64)
3329   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
3330   NODE_NAME_CASE(READ_CYCLE_WIDE)
3331   NODE_NAME_CASE(GREVI)
3332   NODE_NAME_CASE(GREVIW)
3333   NODE_NAME_CASE(GORCI)
3334   NODE_NAME_CASE(GORCIW)
3335   }
3336   // clang-format on
3337   return nullptr;
3338 #undef NODE_NAME_CASE
3339 }
3340 
3341 /// getConstraintType - Given a constraint letter, return the type of
3342 /// constraint it is for this target.
3343 RISCVTargetLowering::ConstraintType
3344 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
3345   if (Constraint.size() == 1) {
3346     switch (Constraint[0]) {
3347     default:
3348       break;
3349     case 'f':
3350       return C_RegisterClass;
3351     case 'I':
3352     case 'J':
3353     case 'K':
3354       return C_Immediate;
3355     case 'A':
3356       return C_Memory;
3357     }
3358   }
3359   return TargetLowering::getConstraintType(Constraint);
3360 }
3361 
3362 std::pair<unsigned, const TargetRegisterClass *>
3363 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3364                                                   StringRef Constraint,
3365                                                   MVT VT) const {
3366   // First, see if this is a constraint that directly corresponds to a
3367   // RISCV register class.
3368   if (Constraint.size() == 1) {
3369     switch (Constraint[0]) {
3370     case 'r':
3371       return std::make_pair(0U, &RISCV::GPRRegClass);
3372     case 'f':
3373       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
3374         return std::make_pair(0U, &RISCV::FPR16RegClass);
3375       if (Subtarget.hasStdExtF() && VT == MVT::f32)
3376         return std::make_pair(0U, &RISCV::FPR32RegClass);
3377       if (Subtarget.hasStdExtD() && VT == MVT::f64)
3378         return std::make_pair(0U, &RISCV::FPR64RegClass);
3379       break;
3380     default:
3381       break;
3382     }
3383   }
3384 
3385   // Clang will correctly decode the usage of register name aliases into their
3386   // official names. However, other frontends like `rustc` do not. This allows
3387   // users of these frontends to use the ABI names for registers in LLVM-style
3388   // register constraints.
3389   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
3390                                .Case("{zero}", RISCV::X0)
3391                                .Case("{ra}", RISCV::X1)
3392                                .Case("{sp}", RISCV::X2)
3393                                .Case("{gp}", RISCV::X3)
3394                                .Case("{tp}", RISCV::X4)
3395                                .Case("{t0}", RISCV::X5)
3396                                .Case("{t1}", RISCV::X6)
3397                                .Case("{t2}", RISCV::X7)
3398                                .Cases("{s0}", "{fp}", RISCV::X8)
3399                                .Case("{s1}", RISCV::X9)
3400                                .Case("{a0}", RISCV::X10)
3401                                .Case("{a1}", RISCV::X11)
3402                                .Case("{a2}", RISCV::X12)
3403                                .Case("{a3}", RISCV::X13)
3404                                .Case("{a4}", RISCV::X14)
3405                                .Case("{a5}", RISCV::X15)
3406                                .Case("{a6}", RISCV::X16)
3407                                .Case("{a7}", RISCV::X17)
3408                                .Case("{s2}", RISCV::X18)
3409                                .Case("{s3}", RISCV::X19)
3410                                .Case("{s4}", RISCV::X20)
3411                                .Case("{s5}", RISCV::X21)
3412                                .Case("{s6}", RISCV::X22)
3413                                .Case("{s7}", RISCV::X23)
3414                                .Case("{s8}", RISCV::X24)
3415                                .Case("{s9}", RISCV::X25)
3416                                .Case("{s10}", RISCV::X26)
3417                                .Case("{s11}", RISCV::X27)
3418                                .Case("{t3}", RISCV::X28)
3419                                .Case("{t4}", RISCV::X29)
3420                                .Case("{t5}", RISCV::X30)
3421                                .Case("{t6}", RISCV::X31)
3422                                .Default(RISCV::NoRegister);
3423   if (XRegFromAlias != RISCV::NoRegister)
3424     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
3425 
3426   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
3427   // TableGen record rather than the AsmName to choose registers for InlineAsm
3428   // constraints, plus we want to match those names to the widest floating point
3429   // register type available, manually select floating point registers here.
3430   //
3431   // The second case is the ABI name of the register, so that frontends can also
3432   // use the ABI names in register constraint lists.
3433   if (Subtarget.hasStdExtF()) {
3434     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
3435                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
3436                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
3437                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
3438                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
3439                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
3440                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
3441                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
3442                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
3443                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
3444                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
3445                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
3446                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
3447                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
3448                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
3449                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
3450                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
3451                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
3452                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
3453                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
3454                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
3455                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
3456                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
3457                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
3458                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
3459                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
3460                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
3461                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
3462                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
3463                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
3464                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
3465                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
3466                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
3467                         .Default(RISCV::NoRegister);
3468     if (FReg != RISCV::NoRegister) {
3469       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
3470       if (Subtarget.hasStdExtD()) {
3471         unsigned RegNo = FReg - RISCV::F0_F;
3472         unsigned DReg = RISCV::F0_D + RegNo;
3473         return std::make_pair(DReg, &RISCV::FPR64RegClass);
3474       }
3475       return std::make_pair(FReg, &RISCV::FPR32RegClass);
3476     }
3477   }
3478 
3479   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3480 }
3481 
3482 unsigned
3483 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
3484   // Currently only support length 1 constraints.
3485   if (ConstraintCode.size() == 1) {
3486     switch (ConstraintCode[0]) {
3487     case 'A':
3488       return InlineAsm::Constraint_A;
3489     default:
3490       break;
3491     }
3492   }
3493 
3494   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
3495 }
3496 
3497 void RISCVTargetLowering::LowerAsmOperandForConstraint(
3498     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
3499     SelectionDAG &DAG) const {
3500   // Currently only support length 1 constraints.
3501   if (Constraint.length() == 1) {
3502     switch (Constraint[0]) {
3503     case 'I':
3504       // Validate & create a 12-bit signed immediate operand.
3505       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3506         uint64_t CVal = C->getSExtValue();
3507         if (isInt<12>(CVal))
3508           Ops.push_back(
3509               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
3510       }
3511       return;
3512     case 'J':
3513       // Validate & create an integer zero operand.
3514       if (auto *C = dyn_cast<ConstantSDNode>(Op))
3515         if (C->getZExtValue() == 0)
3516           Ops.push_back(
3517               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
3518       return;
3519     case 'K':
3520       // Validate & create a 5-bit unsigned immediate operand.
3521       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3522         uint64_t CVal = C->getZExtValue();
3523         if (isUInt<5>(CVal))
3524           Ops.push_back(
3525               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
3526       }
3527       return;
3528     default:
3529       break;
3530     }
3531   }
3532   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3533 }
3534 
3535 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
3536                                                    Instruction *Inst,
3537                                                    AtomicOrdering Ord) const {
3538   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
3539     return Builder.CreateFence(Ord);
3540   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
3541     return Builder.CreateFence(AtomicOrdering::Release);
3542   return nullptr;
3543 }
3544 
3545 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
3546                                                     Instruction *Inst,
3547                                                     AtomicOrdering Ord) const {
3548   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
3549     return Builder.CreateFence(AtomicOrdering::Acquire);
3550   return nullptr;
3551 }
3552 
3553 TargetLowering::AtomicExpansionKind
3554 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
3555   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
3556   // point operations can't be used in an lr/sc sequence without breaking the
3557   // forward-progress guarantee.
3558   if (AI->isFloatingPointOperation())
3559     return AtomicExpansionKind::CmpXChg;
3560 
3561   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
3562   if (Size == 8 || Size == 16)
3563     return AtomicExpansionKind::MaskedIntrinsic;
3564   return AtomicExpansionKind::None;
3565 }
3566 
3567 static Intrinsic::ID
3568 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
3569   if (XLen == 32) {
3570     switch (BinOp) {
3571     default:
3572       llvm_unreachable("Unexpected AtomicRMW BinOp");
3573     case AtomicRMWInst::Xchg:
3574       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
3575     case AtomicRMWInst::Add:
3576       return Intrinsic::riscv_masked_atomicrmw_add_i32;
3577     case AtomicRMWInst::Sub:
3578       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
3579     case AtomicRMWInst::Nand:
3580       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
3581     case AtomicRMWInst::Max:
3582       return Intrinsic::riscv_masked_atomicrmw_max_i32;
3583     case AtomicRMWInst::Min:
3584       return Intrinsic::riscv_masked_atomicrmw_min_i32;
3585     case AtomicRMWInst::UMax:
3586       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
3587     case AtomicRMWInst::UMin:
3588       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
3589     }
3590   }
3591 
3592   if (XLen == 64) {
3593     switch (BinOp) {
3594     default:
3595       llvm_unreachable("Unexpected AtomicRMW BinOp");
3596     case AtomicRMWInst::Xchg:
3597       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
3598     case AtomicRMWInst::Add:
3599       return Intrinsic::riscv_masked_atomicrmw_add_i64;
3600     case AtomicRMWInst::Sub:
3601       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
3602     case AtomicRMWInst::Nand:
3603       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
3604     case AtomicRMWInst::Max:
3605       return Intrinsic::riscv_masked_atomicrmw_max_i64;
3606     case AtomicRMWInst::Min:
3607       return Intrinsic::riscv_masked_atomicrmw_min_i64;
3608     case AtomicRMWInst::UMax:
3609       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
3610     case AtomicRMWInst::UMin:
3611       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
3612     }
3613   }
3614 
3615   llvm_unreachable("Unexpected XLen\n");
3616 }
3617 
3618 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
3619     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
3620     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
3621   unsigned XLen = Subtarget.getXLen();
3622   Value *Ordering =
3623       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
3624   Type *Tys[] = {AlignedAddr->getType()};
3625   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
3626       AI->getModule(),
3627       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
3628 
3629   if (XLen == 64) {
3630     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
3631     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
3632     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
3633   }
3634 
3635   Value *Result;
3636 
3637   // Must pass the shift amount needed to sign extend the loaded value prior
3638   // to performing a signed comparison for min/max. ShiftAmt is the number of
3639   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
3640   // is the number of bits to left+right shift the value in order to
3641   // sign-extend.
3642   if (AI->getOperation() == AtomicRMWInst::Min ||
3643       AI->getOperation() == AtomicRMWInst::Max) {
3644     const DataLayout &DL = AI->getModule()->getDataLayout();
3645     unsigned ValWidth =
3646         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
3647     Value *SextShamt =
3648         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
3649     Result = Builder.CreateCall(LrwOpScwLoop,
3650                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
3651   } else {
3652     Result =
3653         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
3654   }
3655 
3656   if (XLen == 64)
3657     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
3658   return Result;
3659 }
3660 
3661 TargetLowering::AtomicExpansionKind
3662 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
3663     AtomicCmpXchgInst *CI) const {
3664   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
3665   if (Size == 8 || Size == 16)
3666     return AtomicExpansionKind::MaskedIntrinsic;
3667   return AtomicExpansionKind::None;
3668 }
3669 
3670 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
3671     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
3672     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
3673   unsigned XLen = Subtarget.getXLen();
3674   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
3675   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
3676   if (XLen == 64) {
3677     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
3678     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
3679     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
3680     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
3681   }
3682   Type *Tys[] = {AlignedAddr->getType()};
3683   Function *MaskedCmpXchg =
3684       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
3685   Value *Result = Builder.CreateCall(
3686       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
3687   if (XLen == 64)
3688     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
3689   return Result;
3690 }
3691 
3692 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
3693                                                      EVT VT) const {
3694   VT = VT.getScalarType();
3695 
3696   if (!VT.isSimple())
3697     return false;
3698 
3699   switch (VT.getSimpleVT().SimpleTy) {
3700   case MVT::f16:
3701     return Subtarget.hasStdExtZfh();
3702   case MVT::f32:
3703     return Subtarget.hasStdExtF();
3704   case MVT::f64:
3705     return Subtarget.hasStdExtD();
3706   default:
3707     break;
3708   }
3709 
3710   return false;
3711 }
3712 
3713 Register RISCVTargetLowering::getExceptionPointerRegister(
3714     const Constant *PersonalityFn) const {
3715   return RISCV::X10;
3716 }
3717 
3718 Register RISCVTargetLowering::getExceptionSelectorRegister(
3719     const Constant *PersonalityFn) const {
3720   return RISCV::X11;
3721 }
3722 
3723 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
3724   // Return false to suppress the unnecessary extensions if the LibCall
3725   // arguments or return value is f32 type for LP64 ABI.
3726   RISCVABI::ABI ABI = Subtarget.getTargetABI();
3727   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
3728     return false;
3729 
3730   return true;
3731 }
3732 
3733 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
3734                                                  SDValue C) const {
3735   // Check integral scalar types.
3736   if (VT.isScalarInteger()) {
3737     // Do not perform the transformation on riscv32 with the M extension.
3738     if (!Subtarget.is64Bit() && Subtarget.hasStdExtM())
3739       return false;
3740     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
3741       if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t))
3742         return false;
3743       int64_t Imm = ConstNode->getSExtValue();
3744       if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) ||
3745           isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm))
3746         return true;
3747     }
3748   }
3749 
3750   return false;
3751 }
3752 
3753 #define GET_REGISTER_MATCHER
3754 #include "RISCVGenAsmMatcher.inc"
3755 
3756 Register
3757 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
3758                                        const MachineFunction &MF) const {
3759   Register Reg = MatchRegisterAltName(RegName);
3760   if (Reg == RISCV::NoRegister)
3761     Reg = MatchRegisterName(RegName);
3762   if (Reg == RISCV::NoRegister)
3763     report_fatal_error(
3764         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
3765   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
3766   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
3767     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
3768                              StringRef(RegName) + "\"."));
3769   return Reg;
3770 }
3771 
3772 namespace llvm {
3773 namespace RISCVVIntrinsicsTable {
3774 
3775 #define GET_RISCVVIntrinsicsTable_IMPL
3776 #include "RISCVGenSearchableTables.inc"
3777 
3778 } // namespace RISCVVIntrinsicsTable
3779 } // namespace llvm
3780