1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "Utils/RISCVMatInt.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/SelectionDAGISel.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/CodeGen/ValueTypes.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/DiagnosticPrinter.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "riscv-lower"
41 
42 STATISTIC(NumTailCalls, "Number of tail calls");
43 
44 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
45                                          const RISCVSubtarget &STI)
46     : TargetLowering(TM), Subtarget(STI) {
47 
48   if (Subtarget.isRV32E())
49     report_fatal_error("Codegen not yet implemented for RV32E");
50 
51   RISCVABI::ABI ABI = Subtarget.getTargetABI();
52   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
53 
54   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
55       !Subtarget.hasStdExtF()) {
56     errs() << "Hard-float 'f' ABI can't be used for a target that "
57                 "doesn't support the F instruction set extension (ignoring "
58                           "target-abi)\n";
59     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
60   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
61              !Subtarget.hasStdExtD()) {
62     errs() << "Hard-float 'd' ABI can't be used for a target that "
63               "doesn't support the D instruction set extension (ignoring "
64               "target-abi)\n";
65     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
66   }
67 
68   switch (ABI) {
69   default:
70     report_fatal_error("Don't know how to lower this ABI");
71   case RISCVABI::ABI_ILP32:
72   case RISCVABI::ABI_ILP32F:
73   case RISCVABI::ABI_ILP32D:
74   case RISCVABI::ABI_LP64:
75   case RISCVABI::ABI_LP64F:
76   case RISCVABI::ABI_LP64D:
77     break;
78   }
79 
80   MVT XLenVT = Subtarget.getXLenVT();
81 
82   // Set up the register classes.
83   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
84 
85   if (Subtarget.hasStdExtF())
86     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
87   if (Subtarget.hasStdExtD())
88     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
89 
90   // Compute derived properties from the register classes.
91   computeRegisterProperties(STI.getRegisterInfo());
92 
93   setStackPointerRegisterToSaveRestore(RISCV::X2);
94 
95   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
96     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
97 
98   // TODO: add all necessary setOperationAction calls.
99   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
100 
101   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
102   setOperationAction(ISD::BR_CC, XLenVT, Expand);
103   setOperationAction(ISD::SELECT, XLenVT, Custom);
104   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
105 
106   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
107   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
108 
109   setOperationAction(ISD::VASTART, MVT::Other, Custom);
110   setOperationAction(ISD::VAARG, MVT::Other, Expand);
111   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
112   setOperationAction(ISD::VAEND, MVT::Other, Expand);
113 
114   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
115     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
116 
117   if (Subtarget.is64Bit()) {
118     setOperationAction(ISD::ADD, MVT::i32, Custom);
119     setOperationAction(ISD::SUB, MVT::i32, Custom);
120     setOperationAction(ISD::SHL, MVT::i32, Custom);
121     setOperationAction(ISD::SRA, MVT::i32, Custom);
122     setOperationAction(ISD::SRL, MVT::i32, Custom);
123   }
124 
125   if (!Subtarget.hasStdExtM()) {
126     setOperationAction(ISD::MUL, XLenVT, Expand);
127     setOperationAction(ISD::MULHS, XLenVT, Expand);
128     setOperationAction(ISD::MULHU, XLenVT, Expand);
129     setOperationAction(ISD::SDIV, XLenVT, Expand);
130     setOperationAction(ISD::UDIV, XLenVT, Expand);
131     setOperationAction(ISD::SREM, XLenVT, Expand);
132     setOperationAction(ISD::UREM, XLenVT, Expand);
133   }
134 
135   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
136     setOperationAction(ISD::MUL, MVT::i32, Custom);
137     setOperationAction(ISD::SDIV, MVT::i32, Custom);
138     setOperationAction(ISD::UDIV, MVT::i32, Custom);
139     setOperationAction(ISD::UREM, MVT::i32, Custom);
140   }
141 
142   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
143   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
144   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
145   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
146 
147   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
148   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
149   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
150 
151   setOperationAction(ISD::ROTL, XLenVT, Expand);
152   setOperationAction(ISD::ROTR, XLenVT, Expand);
153   setOperationAction(ISD::BSWAP, XLenVT, Expand);
154   setOperationAction(ISD::CTTZ, XLenVT, Expand);
155   setOperationAction(ISD::CTLZ, XLenVT, Expand);
156   setOperationAction(ISD::CTPOP, XLenVT, Expand);
157 
158   ISD::CondCode FPCCToExtend[] = {
159       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
160       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
161       ISD::SETGE,  ISD::SETNE};
162 
163   ISD::NodeType FPOpToExtend[] = {
164       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
165       ISD::FP_TO_FP16};
166 
167   if (Subtarget.hasStdExtF()) {
168     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
169     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
170     for (auto CC : FPCCToExtend)
171       setCondCodeAction(CC, MVT::f32, Expand);
172     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
173     setOperationAction(ISD::SELECT, MVT::f32, Custom);
174     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
175     for (auto Op : FPOpToExtend)
176       setOperationAction(Op, MVT::f32, Expand);
177     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
178     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
179   }
180 
181   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
182     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
183 
184   if (Subtarget.hasStdExtD()) {
185     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
186     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
187     for (auto CC : FPCCToExtend)
188       setCondCodeAction(CC, MVT::f64, Expand);
189     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
190     setOperationAction(ISD::SELECT, MVT::f64, Custom);
191     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
192     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
193     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
194     for (auto Op : FPOpToExtend)
195       setOperationAction(Op, MVT::f64, Expand);
196     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
197     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
198   }
199 
200   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
201   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
202   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
203 
204   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
205 
206   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
207   // Unfortunately this can't be determined just from the ISA naming string.
208   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
209                      Subtarget.is64Bit() ? Legal : Custom);
210 
211   setOperationAction(ISD::TRAP, MVT::Other, Legal);
212   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
213 
214   if (Subtarget.hasStdExtA()) {
215     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
216     setMinCmpXchgSizeInBits(32);
217   } else {
218     setMaxAtomicSizeInBitsSupported(0);
219   }
220 
221   setBooleanContents(ZeroOrOneBooleanContent);
222 
223   // Function alignments.
224   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
225   setMinFunctionAlignment(FunctionAlignment);
226   setPrefFunctionAlignment(FunctionAlignment);
227 
228   // Effectively disable jump table generation.
229   setMinimumJumpTableEntries(INT_MAX);
230 }
231 
232 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
233                                             EVT VT) const {
234   if (!VT.isVector())
235     return getPointerTy(DL);
236   return VT.changeVectorElementTypeToInteger();
237 }
238 
239 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
240                                              const CallInst &I,
241                                              MachineFunction &MF,
242                                              unsigned Intrinsic) const {
243   switch (Intrinsic) {
244   default:
245     return false;
246   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
247   case Intrinsic::riscv_masked_atomicrmw_add_i32:
248   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
249   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
250   case Intrinsic::riscv_masked_atomicrmw_max_i32:
251   case Intrinsic::riscv_masked_atomicrmw_min_i32:
252   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
253   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
254   case Intrinsic::riscv_masked_cmpxchg_i32:
255     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
256     Info.opc = ISD::INTRINSIC_W_CHAIN;
257     Info.memVT = MVT::getVT(PtrTy->getElementType());
258     Info.ptrVal = I.getArgOperand(0);
259     Info.offset = 0;
260     Info.align = Align(4);
261     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
262                  MachineMemOperand::MOVolatile;
263     return true;
264   }
265 }
266 
267 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
268                                                 const AddrMode &AM, Type *Ty,
269                                                 unsigned AS,
270                                                 Instruction *I) const {
271   // No global is ever allowed as a base.
272   if (AM.BaseGV)
273     return false;
274 
275   // Require a 12-bit signed offset.
276   if (!isInt<12>(AM.BaseOffs))
277     return false;
278 
279   switch (AM.Scale) {
280   case 0: // "r+i" or just "i", depending on HasBaseReg.
281     break;
282   case 1:
283     if (!AM.HasBaseReg) // allow "r+i".
284       break;
285     return false; // disallow "r+r" or "r+r+i".
286   default:
287     return false;
288   }
289 
290   return true;
291 }
292 
293 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
294   return isInt<12>(Imm);
295 }
296 
297 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
298   return isInt<12>(Imm);
299 }
300 
301 // On RV32, 64-bit integers are split into their high and low parts and held
302 // in two different registers, so the trunc is free since the low register can
303 // just be used.
304 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
305   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
306     return false;
307   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
308   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
309   return (SrcBits == 64 && DestBits == 32);
310 }
311 
312 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
313   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
314       !SrcVT.isInteger() || !DstVT.isInteger())
315     return false;
316   unsigned SrcBits = SrcVT.getSizeInBits();
317   unsigned DestBits = DstVT.getSizeInBits();
318   return (SrcBits == 64 && DestBits == 32);
319 }
320 
321 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
322   // Zexts are free if they can be combined with a load.
323   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
324     EVT MemVT = LD->getMemoryVT();
325     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
326          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
327         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
328          LD->getExtensionType() == ISD::ZEXTLOAD))
329       return true;
330   }
331 
332   return TargetLowering::isZExtFree(Val, VT2);
333 }
334 
335 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
336   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
337 }
338 
339 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
340                                        bool ForCodeSize) const {
341   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
342     return false;
343   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
344     return false;
345   if (Imm.isNegZero())
346     return false;
347   return Imm.isZero();
348 }
349 
350 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
351   return (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
352          (VT == MVT::f64 && Subtarget.hasStdExtD());
353 }
354 
355 // Changes the condition code and swaps operands if necessary, so the SetCC
356 // operation matches one of the comparisons supported directly in the RISC-V
357 // ISA.
358 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
359   switch (CC) {
360   default:
361     break;
362   case ISD::SETGT:
363   case ISD::SETLE:
364   case ISD::SETUGT:
365   case ISD::SETULE:
366     CC = ISD::getSetCCSwappedOperands(CC);
367     std::swap(LHS, RHS);
368     break;
369   }
370 }
371 
372 // Return the RISC-V branch opcode that matches the given DAG integer
373 // condition code. The CondCode must be one of those supported by the RISC-V
374 // ISA (see normaliseSetCC).
375 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
376   switch (CC) {
377   default:
378     llvm_unreachable("Unsupported CondCode");
379   case ISD::SETEQ:
380     return RISCV::BEQ;
381   case ISD::SETNE:
382     return RISCV::BNE;
383   case ISD::SETLT:
384     return RISCV::BLT;
385   case ISD::SETGE:
386     return RISCV::BGE;
387   case ISD::SETULT:
388     return RISCV::BLTU;
389   case ISD::SETUGE:
390     return RISCV::BGEU;
391   }
392 }
393 
394 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
395                                             SelectionDAG &DAG) const {
396   switch (Op.getOpcode()) {
397   default:
398     report_fatal_error("unimplemented operand");
399   case ISD::GlobalAddress:
400     return lowerGlobalAddress(Op, DAG);
401   case ISD::BlockAddress:
402     return lowerBlockAddress(Op, DAG);
403   case ISD::ConstantPool:
404     return lowerConstantPool(Op, DAG);
405   case ISD::GlobalTLSAddress:
406     return lowerGlobalTLSAddress(Op, DAG);
407   case ISD::SELECT:
408     return lowerSELECT(Op, DAG);
409   case ISD::VASTART:
410     return lowerVASTART(Op, DAG);
411   case ISD::FRAMEADDR:
412     return lowerFRAMEADDR(Op, DAG);
413   case ISD::RETURNADDR:
414     return lowerRETURNADDR(Op, DAG);
415   case ISD::SHL_PARTS:
416     return lowerShiftLeftParts(Op, DAG);
417   case ISD::SRA_PARTS:
418     return lowerShiftRightParts(Op, DAG, true);
419   case ISD::SRL_PARTS:
420     return lowerShiftRightParts(Op, DAG, false);
421   case ISD::BITCAST: {
422     assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
423            "Unexpected custom legalisation");
424     SDLoc DL(Op);
425     SDValue Op0 = Op.getOperand(0);
426     if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
427       return SDValue();
428     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
429     SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
430     return FPConv;
431   }
432   }
433 }
434 
435 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
436                              SelectionDAG &DAG, unsigned Flags) {
437   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
438 }
439 
440 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
441                              SelectionDAG &DAG, unsigned Flags) {
442   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
443                                    Flags);
444 }
445 
446 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
447                              SelectionDAG &DAG, unsigned Flags) {
448   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
449                                    N->getOffset(), Flags);
450 }
451 
452 template <class NodeTy>
453 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
454                                      bool IsLocal) const {
455   SDLoc DL(N);
456   EVT Ty = getPointerTy(DAG.getDataLayout());
457 
458   if (isPositionIndependent()) {
459     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
460     if (IsLocal)
461       // Use PC-relative addressing to access the symbol. This generates the
462       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
463       // %pcrel_lo(auipc)).
464       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
465 
466     // Use PC-relative addressing to access the GOT for this symbol, then load
467     // the address from the GOT. This generates the pattern (PseudoLA sym),
468     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
469     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
470   }
471 
472   switch (getTargetMachine().getCodeModel()) {
473   default:
474     report_fatal_error("Unsupported code model for lowering");
475   case CodeModel::Small: {
476     // Generate a sequence for accessing addresses within the first 2 GiB of
477     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
478     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
479     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
480     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
481     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
482   }
483   case CodeModel::Medium: {
484     // Generate a sequence for accessing addresses within any 2GiB range within
485     // the address space. This generates the pattern (PseudoLLA sym), which
486     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
487     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
488     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
489   }
490   }
491 }
492 
493 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
494                                                 SelectionDAG &DAG) const {
495   SDLoc DL(Op);
496   EVT Ty = Op.getValueType();
497   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
498   int64_t Offset = N->getOffset();
499   MVT XLenVT = Subtarget.getXLenVT();
500 
501   const GlobalValue *GV = N->getGlobal();
502   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
503   SDValue Addr = getAddr(N, DAG, IsLocal);
504 
505   // In order to maximise the opportunity for common subexpression elimination,
506   // emit a separate ADD node for the global address offset instead of folding
507   // it in the global address node. Later peephole optimisations may choose to
508   // fold it back in when profitable.
509   if (Offset != 0)
510     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
511                        DAG.getConstant(Offset, DL, XLenVT));
512   return Addr;
513 }
514 
515 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
516                                                SelectionDAG &DAG) const {
517   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
518 
519   return getAddr(N, DAG);
520 }
521 
522 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
523                                                SelectionDAG &DAG) const {
524   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
525 
526   return getAddr(N, DAG);
527 }
528 
529 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
530                                               SelectionDAG &DAG,
531                                               bool UseGOT) const {
532   SDLoc DL(N);
533   EVT Ty = getPointerTy(DAG.getDataLayout());
534   const GlobalValue *GV = N->getGlobal();
535   MVT XLenVT = Subtarget.getXLenVT();
536 
537   if (UseGOT) {
538     // Use PC-relative addressing to access the GOT for this TLS symbol, then
539     // load the address from the GOT and add the thread pointer. This generates
540     // the pattern (PseudoLA_TLS_IE sym), which expands to
541     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
542     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
543     SDValue Load =
544         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
545 
546     // Add the thread pointer.
547     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
548     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
549   }
550 
551   // Generate a sequence for accessing the address relative to the thread
552   // pointer, with the appropriate adjustment for the thread pointer offset.
553   // This generates the pattern
554   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
555   SDValue AddrHi =
556       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
557   SDValue AddrAdd =
558       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
559   SDValue AddrLo =
560       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
561 
562   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
563   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
564   SDValue MNAdd = SDValue(
565       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
566       0);
567   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
568 }
569 
570 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
571                                                SelectionDAG &DAG) const {
572   SDLoc DL(N);
573   EVT Ty = getPointerTy(DAG.getDataLayout());
574   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
575   const GlobalValue *GV = N->getGlobal();
576 
577   // Use a PC-relative addressing mode to access the global dynamic GOT address.
578   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
579   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
580   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
581   SDValue Load =
582       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
583 
584   // Prepare argument list to generate call.
585   ArgListTy Args;
586   ArgListEntry Entry;
587   Entry.Node = Load;
588   Entry.Ty = CallTy;
589   Args.push_back(Entry);
590 
591   // Setup call to __tls_get_addr.
592   TargetLowering::CallLoweringInfo CLI(DAG);
593   CLI.setDebugLoc(DL)
594       .setChain(DAG.getEntryNode())
595       .setLibCallee(CallingConv::C, CallTy,
596                     DAG.getExternalSymbol("__tls_get_addr", Ty),
597                     std::move(Args));
598 
599   return LowerCallTo(CLI).first;
600 }
601 
602 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
603                                                    SelectionDAG &DAG) const {
604   SDLoc DL(Op);
605   EVT Ty = Op.getValueType();
606   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
607   int64_t Offset = N->getOffset();
608   MVT XLenVT = Subtarget.getXLenVT();
609 
610   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
611 
612   SDValue Addr;
613   switch (Model) {
614   case TLSModel::LocalExec:
615     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
616     break;
617   case TLSModel::InitialExec:
618     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
619     break;
620   case TLSModel::LocalDynamic:
621   case TLSModel::GeneralDynamic:
622     Addr = getDynamicTLSAddr(N, DAG);
623     break;
624   }
625 
626   // In order to maximise the opportunity for common subexpression elimination,
627   // emit a separate ADD node for the global address offset instead of folding
628   // it in the global address node. Later peephole optimisations may choose to
629   // fold it back in when profitable.
630   if (Offset != 0)
631     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
632                        DAG.getConstant(Offset, DL, XLenVT));
633   return Addr;
634 }
635 
636 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
637   SDValue CondV = Op.getOperand(0);
638   SDValue TrueV = Op.getOperand(1);
639   SDValue FalseV = Op.getOperand(2);
640   SDLoc DL(Op);
641   MVT XLenVT = Subtarget.getXLenVT();
642 
643   // If the result type is XLenVT and CondV is the output of a SETCC node
644   // which also operated on XLenVT inputs, then merge the SETCC node into the
645   // lowered RISCVISD::SELECT_CC to take advantage of the integer
646   // compare+branch instructions. i.e.:
647   // (select (setcc lhs, rhs, cc), truev, falsev)
648   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
649   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
650       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
651     SDValue LHS = CondV.getOperand(0);
652     SDValue RHS = CondV.getOperand(1);
653     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
654     ISD::CondCode CCVal = CC->get();
655 
656     normaliseSetCC(LHS, RHS, CCVal);
657 
658     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
659     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
660     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
661     return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
662   }
663 
664   // Otherwise:
665   // (select condv, truev, falsev)
666   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
667   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
668   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
669 
670   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
671   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
672 
673   return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
674 }
675 
676 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
677   MachineFunction &MF = DAG.getMachineFunction();
678   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
679 
680   SDLoc DL(Op);
681   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
682                                  getPointerTy(MF.getDataLayout()));
683 
684   // vastart just stores the address of the VarArgsFrameIndex slot into the
685   // memory location argument.
686   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
687   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
688                       MachinePointerInfo(SV));
689 }
690 
691 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
692                                             SelectionDAG &DAG) const {
693   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
694   MachineFunction &MF = DAG.getMachineFunction();
695   MachineFrameInfo &MFI = MF.getFrameInfo();
696   MFI.setFrameAddressIsTaken(true);
697   Register FrameReg = RI.getFrameRegister(MF);
698   int XLenInBytes = Subtarget.getXLen() / 8;
699 
700   EVT VT = Op.getValueType();
701   SDLoc DL(Op);
702   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
703   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
704   while (Depth--) {
705     int Offset = -(XLenInBytes * 2);
706     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
707                               DAG.getIntPtrConstant(Offset, DL));
708     FrameAddr =
709         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
710   }
711   return FrameAddr;
712 }
713 
714 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
715                                              SelectionDAG &DAG) const {
716   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
717   MachineFunction &MF = DAG.getMachineFunction();
718   MachineFrameInfo &MFI = MF.getFrameInfo();
719   MFI.setReturnAddressIsTaken(true);
720   MVT XLenVT = Subtarget.getXLenVT();
721   int XLenInBytes = Subtarget.getXLen() / 8;
722 
723   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
724     return SDValue();
725 
726   EVT VT = Op.getValueType();
727   SDLoc DL(Op);
728   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
729   if (Depth) {
730     int Off = -XLenInBytes;
731     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
732     SDValue Offset = DAG.getConstant(Off, DL, VT);
733     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
734                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
735                        MachinePointerInfo());
736   }
737 
738   // Return the value of the return address register, marking it an implicit
739   // live-in.
740   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
741   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
742 }
743 
744 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
745                                                  SelectionDAG &DAG) const {
746   SDLoc DL(Op);
747   SDValue Lo = Op.getOperand(0);
748   SDValue Hi = Op.getOperand(1);
749   SDValue Shamt = Op.getOperand(2);
750   EVT VT = Lo.getValueType();
751 
752   // if Shamt-XLEN < 0: // Shamt < XLEN
753   //   Lo = Lo << Shamt
754   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
755   // else:
756   //   Lo = 0
757   //   Hi = Lo << (Shamt-XLEN)
758 
759   SDValue Zero = DAG.getConstant(0, DL, VT);
760   SDValue One = DAG.getConstant(1, DL, VT);
761   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
762   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
763   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
764   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
765 
766   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
767   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
768   SDValue ShiftRightLo =
769       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
770   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
771   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
772   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
773 
774   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
775 
776   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
777   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
778 
779   SDValue Parts[2] = {Lo, Hi};
780   return DAG.getMergeValues(Parts, DL);
781 }
782 
783 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
784                                                   bool IsSRA) const {
785   SDLoc DL(Op);
786   SDValue Lo = Op.getOperand(0);
787   SDValue Hi = Op.getOperand(1);
788   SDValue Shamt = Op.getOperand(2);
789   EVT VT = Lo.getValueType();
790 
791   // SRA expansion:
792   //   if Shamt-XLEN < 0: // Shamt < XLEN
793   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
794   //     Hi = Hi >>s Shamt
795   //   else:
796   //     Lo = Hi >>s (Shamt-XLEN);
797   //     Hi = Hi >>s (XLEN-1)
798   //
799   // SRL expansion:
800   //   if Shamt-XLEN < 0: // Shamt < XLEN
801   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
802   //     Hi = Hi >>u Shamt
803   //   else:
804   //     Lo = Hi >>u (Shamt-XLEN);
805   //     Hi = 0;
806 
807   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
808 
809   SDValue Zero = DAG.getConstant(0, DL, VT);
810   SDValue One = DAG.getConstant(1, DL, VT);
811   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
812   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
813   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
814   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
815 
816   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
817   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
818   SDValue ShiftLeftHi =
819       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
820   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
821   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
822   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
823   SDValue HiFalse =
824       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
825 
826   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
827 
828   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
829   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
830 
831   SDValue Parts[2] = {Lo, Hi};
832   return DAG.getMergeValues(Parts, DL);
833 }
834 
835 // Returns the opcode of the target-specific SDNode that implements the 32-bit
836 // form of the given Opcode.
837 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
838   switch (Opcode) {
839   default:
840     llvm_unreachable("Unexpected opcode");
841   case ISD::SHL:
842     return RISCVISD::SLLW;
843   case ISD::SRA:
844     return RISCVISD::SRAW;
845   case ISD::SRL:
846     return RISCVISD::SRLW;
847   case ISD::SDIV:
848     return RISCVISD::DIVW;
849   case ISD::UDIV:
850     return RISCVISD::DIVUW;
851   case ISD::UREM:
852     return RISCVISD::REMUW;
853   }
854 }
855 
856 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
857 // Because i32 isn't a legal type for RV64, these operations would otherwise
858 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
859 // later one because the fact the operation was originally of type i32 is
860 // lost.
861 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
862   SDLoc DL(N);
863   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
864   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
865   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
866   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
867   // ReplaceNodeResults requires we maintain the same type for the return value.
868   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
869 }
870 
871 // Converts the given 32-bit operation to a i64 operation with signed extension
872 // semantic to reduce the signed extension instructions.
873 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
874   SDLoc DL(N);
875   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
876   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
877   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
878   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
879                                DAG.getValueType(MVT::i32));
880   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
881 }
882 
883 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
884                                              SmallVectorImpl<SDValue> &Results,
885                                              SelectionDAG &DAG) const {
886   SDLoc DL(N);
887   switch (N->getOpcode()) {
888   default:
889     llvm_unreachable("Don't know how to custom type legalize this operation!");
890   case ISD::READCYCLECOUNTER: {
891     assert(!Subtarget.is64Bit() &&
892            "READCYCLECOUNTER only has custom type legalization on riscv32");
893 
894     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
895     SDValue RCW =
896         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
897 
898     Results.push_back(
899         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
900     Results.push_back(RCW.getValue(2));
901     break;
902   }
903   case ISD::ADD:
904   case ISD::SUB:
905   case ISD::MUL:
906     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
907            "Unexpected custom legalisation");
908     if (N->getOperand(1).getOpcode() == ISD::Constant)
909       return;
910     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
911     break;
912   case ISD::SHL:
913   case ISD::SRA:
914   case ISD::SRL:
915     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
916            "Unexpected custom legalisation");
917     if (N->getOperand(1).getOpcode() == ISD::Constant)
918       return;
919     Results.push_back(customLegalizeToWOp(N, DAG));
920     break;
921   case ISD::SDIV:
922   case ISD::UDIV:
923   case ISD::UREM:
924     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
925            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
926     if (N->getOperand(0).getOpcode() == ISD::Constant ||
927         N->getOperand(1).getOpcode() == ISD::Constant)
928       return;
929     Results.push_back(customLegalizeToWOp(N, DAG));
930     break;
931   case ISD::BITCAST: {
932     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
933            Subtarget.hasStdExtF() && "Unexpected custom legalisation");
934     SDLoc DL(N);
935     SDValue Op0 = N->getOperand(0);
936     if (Op0.getValueType() != MVT::f32)
937       return;
938     SDValue FPConv =
939         DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
940     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
941     break;
942   }
943   }
944 }
945 
946 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
947                                                DAGCombinerInfo &DCI) const {
948   SelectionDAG &DAG = DCI.DAG;
949 
950   switch (N->getOpcode()) {
951   default:
952     break;
953   case RISCVISD::SplitF64: {
954     SDValue Op0 = N->getOperand(0);
955     // If the input to SplitF64 is just BuildPairF64 then the operation is
956     // redundant. Instead, use BuildPairF64's operands directly.
957     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
958       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
959 
960     SDLoc DL(N);
961 
962     // It's cheaper to materialise two 32-bit integers than to load a double
963     // from the constant pool and transfer it to integer registers through the
964     // stack.
965     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
966       APInt V = C->getValueAPF().bitcastToAPInt();
967       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
968       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
969       return DCI.CombineTo(N, Lo, Hi);
970     }
971 
972     // This is a target-specific version of a DAGCombine performed in
973     // DAGCombiner::visitBITCAST. It performs the equivalent of:
974     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
975     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
976     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
977         !Op0.getNode()->hasOneUse())
978       break;
979     SDValue NewSplitF64 =
980         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
981                     Op0.getOperand(0));
982     SDValue Lo = NewSplitF64.getValue(0);
983     SDValue Hi = NewSplitF64.getValue(1);
984     APInt SignBit = APInt::getSignMask(32);
985     if (Op0.getOpcode() == ISD::FNEG) {
986       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
987                                   DAG.getConstant(SignBit, DL, MVT::i32));
988       return DCI.CombineTo(N, Lo, NewHi);
989     }
990     assert(Op0.getOpcode() == ISD::FABS);
991     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
992                                 DAG.getConstant(~SignBit, DL, MVT::i32));
993     return DCI.CombineTo(N, Lo, NewHi);
994   }
995   case RISCVISD::SLLW:
996   case RISCVISD::SRAW:
997   case RISCVISD::SRLW: {
998     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
999     SDValue LHS = N->getOperand(0);
1000     SDValue RHS = N->getOperand(1);
1001     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
1002     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
1003     if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
1004         (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
1005       return SDValue();
1006     break;
1007   }
1008   case RISCVISD::FMV_X_ANYEXTW_RV64: {
1009     SDLoc DL(N);
1010     SDValue Op0 = N->getOperand(0);
1011     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
1012     // conversion is unnecessary and can be replaced with an ANY_EXTEND
1013     // of the FMV_W_X_RV64 operand.
1014     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
1015       SDValue AExtOp =
1016           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
1017       return DCI.CombineTo(N, AExtOp);
1018     }
1019 
1020     // This is a target-specific version of a DAGCombine performed in
1021     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1022     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1023     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1024     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1025         !Op0.getNode()->hasOneUse())
1026       break;
1027     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
1028                                  Op0.getOperand(0));
1029     APInt SignBit = APInt::getSignMask(32).sext(64);
1030     if (Op0.getOpcode() == ISD::FNEG) {
1031       return DCI.CombineTo(N,
1032                            DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
1033                                        DAG.getConstant(SignBit, DL, MVT::i64)));
1034     }
1035     assert(Op0.getOpcode() == ISD::FABS);
1036     return DCI.CombineTo(N,
1037                          DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
1038                                      DAG.getConstant(~SignBit, DL, MVT::i64)));
1039   }
1040   }
1041 
1042   return SDValue();
1043 }
1044 
1045 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1046     const SDNode *N, CombineLevel Level) const {
1047   // The following folds are only desirable if `(OP _, c1 << c2)` can be
1048   // materialised in fewer instructions than `(OP _, c1)`:
1049   //
1050   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1051   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1052   SDValue N0 = N->getOperand(0);
1053   EVT Ty = N0.getValueType();
1054   if (Ty.isScalarInteger() &&
1055       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
1056     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
1057     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
1058     if (C1 && C2) {
1059       APInt C1Int = C1->getAPIntValue();
1060       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
1061 
1062       // We can materialise `c1 << c2` into an add immediate, so it's "free",
1063       // and the combine should happen, to potentially allow further combines
1064       // later.
1065       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
1066           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
1067         return true;
1068 
1069       // We can materialise `c1` in an add immediate, so it's "free", and the
1070       // combine should be prevented.
1071       if (C1Int.getMinSignedBits() <= 64 &&
1072           isLegalAddImmediate(C1Int.getSExtValue()))
1073         return false;
1074 
1075       // Neither constant will fit into an immediate, so find materialisation
1076       // costs.
1077       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
1078                                               Subtarget.is64Bit());
1079       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
1080           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
1081 
1082       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
1083       // combine should be prevented.
1084       if (C1Cost < ShiftedC1Cost)
1085         return false;
1086     }
1087   }
1088   return true;
1089 }
1090 
1091 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
1092     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1093     unsigned Depth) const {
1094   switch (Op.getOpcode()) {
1095   default:
1096     break;
1097   case RISCVISD::SLLW:
1098   case RISCVISD::SRAW:
1099   case RISCVISD::SRLW:
1100   case RISCVISD::DIVW:
1101   case RISCVISD::DIVUW:
1102   case RISCVISD::REMUW:
1103     // TODO: As the result is sign-extended, this is conservatively correct. A
1104     // more precise answer could be calculated for SRAW depending on known
1105     // bits in the shift amount.
1106     return 33;
1107   }
1108 
1109   return 1;
1110 }
1111 
1112 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
1113                                                   MachineBasicBlock *BB) {
1114   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
1115 
1116   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
1117   // Should the count have wrapped while it was being read, we need to try
1118   // again.
1119   // ...
1120   // read:
1121   // rdcycleh x3 # load high word of cycle
1122   // rdcycle  x2 # load low word of cycle
1123   // rdcycleh x4 # load high word of cycle
1124   // bne x3, x4, read # check if high word reads match, otherwise try again
1125   // ...
1126 
1127   MachineFunction &MF = *BB->getParent();
1128   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1129   MachineFunction::iterator It = ++BB->getIterator();
1130 
1131   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1132   MF.insert(It, LoopMBB);
1133 
1134   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1135   MF.insert(It, DoneMBB);
1136 
1137   // Transfer the remainder of BB and its successor edges to DoneMBB.
1138   DoneMBB->splice(DoneMBB->begin(), BB,
1139                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
1140   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
1141 
1142   BB->addSuccessor(LoopMBB);
1143 
1144   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1145   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1146   Register LoReg = MI.getOperand(0).getReg();
1147   Register HiReg = MI.getOperand(1).getReg();
1148   DebugLoc DL = MI.getDebugLoc();
1149 
1150   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1151   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
1152       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
1153       .addReg(RISCV::X0);
1154   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
1155       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
1156       .addReg(RISCV::X0);
1157   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
1158       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
1159       .addReg(RISCV::X0);
1160 
1161   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
1162       .addReg(HiReg)
1163       .addReg(ReadAgainReg)
1164       .addMBB(LoopMBB);
1165 
1166   LoopMBB->addSuccessor(LoopMBB);
1167   LoopMBB->addSuccessor(DoneMBB);
1168 
1169   MI.eraseFromParent();
1170 
1171   return DoneMBB;
1172 }
1173 
1174 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
1175                                              MachineBasicBlock *BB) {
1176   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
1177 
1178   MachineFunction &MF = *BB->getParent();
1179   DebugLoc DL = MI.getDebugLoc();
1180   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1181   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1182   Register LoReg = MI.getOperand(0).getReg();
1183   Register HiReg = MI.getOperand(1).getReg();
1184   Register SrcReg = MI.getOperand(2).getReg();
1185   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
1186   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
1187 
1188   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
1189                           RI);
1190   MachineMemOperand *MMO =
1191       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
1192                               MachineMemOperand::MOLoad, 8, 8);
1193   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
1194       .addFrameIndex(FI)
1195       .addImm(0)
1196       .addMemOperand(MMO);
1197   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
1198       .addFrameIndex(FI)
1199       .addImm(4)
1200       .addMemOperand(MMO);
1201   MI.eraseFromParent(); // The pseudo instruction is gone now.
1202   return BB;
1203 }
1204 
1205 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
1206                                                  MachineBasicBlock *BB) {
1207   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
1208          "Unexpected instruction");
1209 
1210   MachineFunction &MF = *BB->getParent();
1211   DebugLoc DL = MI.getDebugLoc();
1212   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1213   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1214   Register DstReg = MI.getOperand(0).getReg();
1215   Register LoReg = MI.getOperand(1).getReg();
1216   Register HiReg = MI.getOperand(2).getReg();
1217   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
1218   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
1219 
1220   MachineMemOperand *MMO =
1221       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
1222                               MachineMemOperand::MOStore, 8, 8);
1223   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
1224       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
1225       .addFrameIndex(FI)
1226       .addImm(0)
1227       .addMemOperand(MMO);
1228   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
1229       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
1230       .addFrameIndex(FI)
1231       .addImm(4)
1232       .addMemOperand(MMO);
1233   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
1234   MI.eraseFromParent(); // The pseudo instruction is gone now.
1235   return BB;
1236 }
1237 
1238 static bool isSelectPseudo(MachineInstr &MI) {
1239   switch (MI.getOpcode()) {
1240   default:
1241     return false;
1242   case RISCV::Select_GPR_Using_CC_GPR:
1243   case RISCV::Select_FPR32_Using_CC_GPR:
1244   case RISCV::Select_FPR64_Using_CC_GPR:
1245     return true;
1246   }
1247 }
1248 
1249 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
1250                                            MachineBasicBlock *BB) {
1251   // To "insert" Select_* instructions, we actually have to insert the triangle
1252   // control-flow pattern.  The incoming instructions know the destination vreg
1253   // to set, the condition code register to branch on, the true/false values to
1254   // select between, and the condcode to use to select the appropriate branch.
1255   //
1256   // We produce the following control flow:
1257   //     HeadMBB
1258   //     |  \
1259   //     |  IfFalseMBB
1260   //     | /
1261   //    TailMBB
1262   //
1263   // When we find a sequence of selects we attempt to optimize their emission
1264   // by sharing the control flow. Currently we only handle cases where we have
1265   // multiple selects with the exact same condition (same LHS, RHS and CC).
1266   // The selects may be interleaved with other instructions if the other
1267   // instructions meet some requirements we deem safe:
1268   // - They are debug instructions. Otherwise,
1269   // - They do not have side-effects, do not access memory and their inputs do
1270   //   not depend on the results of the select pseudo-instructions.
1271   // The TrueV/FalseV operands of the selects cannot depend on the result of
1272   // previous selects in the sequence.
1273   // These conditions could be further relaxed. See the X86 target for a
1274   // related approach and more information.
1275   Register LHS = MI.getOperand(1).getReg();
1276   Register RHS = MI.getOperand(2).getReg();
1277   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
1278 
1279   SmallVector<MachineInstr *, 4> SelectDebugValues;
1280   SmallSet<Register, 4> SelectDests;
1281   SelectDests.insert(MI.getOperand(0).getReg());
1282 
1283   MachineInstr *LastSelectPseudo = &MI;
1284 
1285   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
1286        SequenceMBBI != E; ++SequenceMBBI) {
1287     if (SequenceMBBI->isDebugInstr())
1288       continue;
1289     else if (isSelectPseudo(*SequenceMBBI)) {
1290       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
1291           SequenceMBBI->getOperand(2).getReg() != RHS ||
1292           SequenceMBBI->getOperand(3).getImm() != CC ||
1293           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
1294           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
1295         break;
1296       LastSelectPseudo = &*SequenceMBBI;
1297       SequenceMBBI->collectDebugValues(SelectDebugValues);
1298       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
1299     } else {
1300       if (SequenceMBBI->hasUnmodeledSideEffects() ||
1301           SequenceMBBI->mayLoadOrStore())
1302         break;
1303       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
1304             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
1305           }))
1306         break;
1307     }
1308   }
1309 
1310   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1311   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1312   DebugLoc DL = MI.getDebugLoc();
1313   MachineFunction::iterator I = ++BB->getIterator();
1314 
1315   MachineBasicBlock *HeadMBB = BB;
1316   MachineFunction *F = BB->getParent();
1317   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
1318   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
1319 
1320   F->insert(I, IfFalseMBB);
1321   F->insert(I, TailMBB);
1322 
1323   // Transfer debug instructions associated with the selects to TailMBB.
1324   for (MachineInstr *DebugInstr : SelectDebugValues) {
1325     TailMBB->push_back(DebugInstr->removeFromParent());
1326   }
1327 
1328   // Move all instructions after the sequence to TailMBB.
1329   TailMBB->splice(TailMBB->end(), HeadMBB,
1330                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
1331   // Update machine-CFG edges by transferring all successors of the current
1332   // block to the new block which will contain the Phi nodes for the selects.
1333   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
1334   // Set the successors for HeadMBB.
1335   HeadMBB->addSuccessor(IfFalseMBB);
1336   HeadMBB->addSuccessor(TailMBB);
1337 
1338   // Insert appropriate branch.
1339   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
1340 
1341   BuildMI(HeadMBB, DL, TII.get(Opcode))
1342     .addReg(LHS)
1343     .addReg(RHS)
1344     .addMBB(TailMBB);
1345 
1346   // IfFalseMBB just falls through to TailMBB.
1347   IfFalseMBB->addSuccessor(TailMBB);
1348 
1349   // Create PHIs for all of the select pseudo-instructions.
1350   auto SelectMBBI = MI.getIterator();
1351   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
1352   auto InsertionPoint = TailMBB->begin();
1353   while (SelectMBBI != SelectEnd) {
1354     auto Next = std::next(SelectMBBI);
1355     if (isSelectPseudo(*SelectMBBI)) {
1356       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
1357       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
1358               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
1359           .addReg(SelectMBBI->getOperand(4).getReg())
1360           .addMBB(HeadMBB)
1361           .addReg(SelectMBBI->getOperand(5).getReg())
1362           .addMBB(IfFalseMBB);
1363       SelectMBBI->eraseFromParent();
1364     }
1365     SelectMBBI = Next;
1366   }
1367 
1368   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
1369   return TailMBB;
1370 }
1371 
1372 MachineBasicBlock *
1373 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1374                                                  MachineBasicBlock *BB) const {
1375   switch (MI.getOpcode()) {
1376   default:
1377     llvm_unreachable("Unexpected instr type to insert");
1378   case RISCV::ReadCycleWide:
1379     assert(!Subtarget.is64Bit() &&
1380            "ReadCycleWrite is only to be used on riscv32");
1381     return emitReadCycleWidePseudo(MI, BB);
1382   case RISCV::Select_GPR_Using_CC_GPR:
1383   case RISCV::Select_FPR32_Using_CC_GPR:
1384   case RISCV::Select_FPR64_Using_CC_GPR:
1385     return emitSelectPseudo(MI, BB);
1386   case RISCV::BuildPairF64Pseudo:
1387     return emitBuildPairF64Pseudo(MI, BB);
1388   case RISCV::SplitF64Pseudo:
1389     return emitSplitF64Pseudo(MI, BB);
1390   }
1391 }
1392 
1393 // Calling Convention Implementation.
1394 // The expectations for frontend ABI lowering vary from target to target.
1395 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
1396 // details, but this is a longer term goal. For now, we simply try to keep the
1397 // role of the frontend as simple and well-defined as possible. The rules can
1398 // be summarised as:
1399 // * Never split up large scalar arguments. We handle them here.
1400 // * If a hardfloat calling convention is being used, and the struct may be
1401 // passed in a pair of registers (fp+fp, int+fp), and both registers are
1402 // available, then pass as two separate arguments. If either the GPRs or FPRs
1403 // are exhausted, then pass according to the rule below.
1404 // * If a struct could never be passed in registers or directly in a stack
1405 // slot (as it is larger than 2*XLEN and the floating point rules don't
1406 // apply), then pass it using a pointer with the byval attribute.
1407 // * If a struct is less than 2*XLEN, then coerce to either a two-element
1408 // word-sized array or a 2*XLEN scalar (depending on alignment).
1409 // * The frontend can determine whether a struct is returned by reference or
1410 // not based on its size and fields. If it will be returned by reference, the
1411 // frontend must modify the prototype so a pointer with the sret annotation is
1412 // passed as the first argument. This is not necessary for large scalar
1413 // returns.
1414 // * Struct return values and varargs should be coerced to structs containing
1415 // register-size fields in the same situations they would be for fixed
1416 // arguments.
1417 
1418 static const MCPhysReg ArgGPRs[] = {
1419   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
1420   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
1421 };
1422 static const MCPhysReg ArgFPR32s[] = {
1423   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
1424   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
1425 };
1426 static const MCPhysReg ArgFPR64s[] = {
1427   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
1428   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
1429 };
1430 
1431 // Pass a 2*XLEN argument that has been split into two XLEN values through
1432 // registers or the stack as necessary.
1433 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
1434                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
1435                                 MVT ValVT2, MVT LocVT2,
1436                                 ISD::ArgFlagsTy ArgFlags2) {
1437   unsigned XLenInBytes = XLen / 8;
1438   if (Register Reg = State.AllocateReg(ArgGPRs)) {
1439     // At least one half can be passed via register.
1440     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
1441                                      VA1.getLocVT(), CCValAssign::Full));
1442   } else {
1443     // Both halves must be passed on the stack, with proper alignment.
1444     unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
1445     State.addLoc(
1446         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
1447                             State.AllocateStack(XLenInBytes, StackAlign),
1448                             VA1.getLocVT(), CCValAssign::Full));
1449     State.addLoc(CCValAssign::getMem(
1450         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
1451         CCValAssign::Full));
1452     return false;
1453   }
1454 
1455   if (Register Reg = State.AllocateReg(ArgGPRs)) {
1456     // The second half can also be passed via register.
1457     State.addLoc(
1458         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
1459   } else {
1460     // The second half is passed via the stack, without additional alignment.
1461     State.addLoc(CCValAssign::getMem(
1462         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
1463         CCValAssign::Full));
1464   }
1465 
1466   return false;
1467 }
1468 
1469 // Implements the RISC-V calling convention. Returns true upon failure.
1470 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1471                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1472                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1473                      bool IsRet, Type *OrigTy) {
1474   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
1475   assert(XLen == 32 || XLen == 64);
1476   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
1477 
1478   // Any return value split in to more than two values can't be returned
1479   // directly.
1480   if (IsRet && ValNo > 1)
1481     return true;
1482 
1483   // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a
1484   // variadic argument, or if no F32 argument registers are available.
1485   bool UseGPRForF32 = true;
1486   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
1487   // variadic argument, or if no F64 argument registers are available.
1488   bool UseGPRForF64 = true;
1489 
1490   switch (ABI) {
1491   default:
1492     llvm_unreachable("Unexpected ABI");
1493   case RISCVABI::ABI_ILP32:
1494   case RISCVABI::ABI_LP64:
1495     break;
1496   case RISCVABI::ABI_ILP32F:
1497   case RISCVABI::ABI_LP64F:
1498     UseGPRForF32 = !IsFixed;
1499     break;
1500   case RISCVABI::ABI_ILP32D:
1501   case RISCVABI::ABI_LP64D:
1502     UseGPRForF32 = !IsFixed;
1503     UseGPRForF64 = !IsFixed;
1504     break;
1505   }
1506 
1507   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s))
1508     UseGPRForF32 = true;
1509   if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s))
1510     UseGPRForF64 = true;
1511 
1512   // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local
1513   // variables rather than directly checking against the target ABI.
1514 
1515   if (UseGPRForF32 && ValVT == MVT::f32) {
1516     LocVT = XLenVT;
1517     LocInfo = CCValAssign::BCvt;
1518   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
1519     LocVT = MVT::i64;
1520     LocInfo = CCValAssign::BCvt;
1521   }
1522 
1523   // If this is a variadic argument, the RISC-V calling convention requires
1524   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
1525   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
1526   // be used regardless of whether the original argument was split during
1527   // legalisation or not. The argument will not be passed by registers if the
1528   // original type is larger than 2*XLEN, so the register alignment rule does
1529   // not apply.
1530   unsigned TwoXLenInBytes = (2 * XLen) / 8;
1531   if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
1532       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
1533     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
1534     // Skip 'odd' register if necessary.
1535     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
1536       State.AllocateReg(ArgGPRs);
1537   }
1538 
1539   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
1540   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
1541       State.getPendingArgFlags();
1542 
1543   assert(PendingLocs.size() == PendingArgFlags.size() &&
1544          "PendingLocs and PendingArgFlags out of sync");
1545 
1546   // Handle passing f64 on RV32D with a soft float ABI or when floating point
1547   // registers are exhausted.
1548   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
1549     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
1550            "Can't lower f64 if it is split");
1551     // Depending on available argument GPRS, f64 may be passed in a pair of
1552     // GPRs, split between a GPR and the stack, or passed completely on the
1553     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
1554     // cases.
1555     Register Reg = State.AllocateReg(ArgGPRs);
1556     LocVT = MVT::i32;
1557     if (!Reg) {
1558       unsigned StackOffset = State.AllocateStack(8, 8);
1559       State.addLoc(
1560           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1561       return false;
1562     }
1563     if (!State.AllocateReg(ArgGPRs))
1564       State.AllocateStack(4, 4);
1565     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1566     return false;
1567   }
1568 
1569   // Split arguments might be passed indirectly, so keep track of the pending
1570   // values.
1571   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
1572     LocVT = XLenVT;
1573     LocInfo = CCValAssign::Indirect;
1574     PendingLocs.push_back(
1575         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
1576     PendingArgFlags.push_back(ArgFlags);
1577     if (!ArgFlags.isSplitEnd()) {
1578       return false;
1579     }
1580   }
1581 
1582   // If the split argument only had two elements, it should be passed directly
1583   // in registers or on the stack.
1584   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
1585     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
1586     // Apply the normal calling convention rules to the first half of the
1587     // split argument.
1588     CCValAssign VA = PendingLocs[0];
1589     ISD::ArgFlagsTy AF = PendingArgFlags[0];
1590     PendingLocs.clear();
1591     PendingArgFlags.clear();
1592     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
1593                                ArgFlags);
1594   }
1595 
1596   // Allocate to a register if possible, or else a stack slot.
1597   Register Reg;
1598   if (ValVT == MVT::f32 && !UseGPRForF32)
1599     Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s);
1600   else if (ValVT == MVT::f64 && !UseGPRForF64)
1601     Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s);
1602   else
1603     Reg = State.AllocateReg(ArgGPRs);
1604   unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
1605 
1606   // If we reach this point and PendingLocs is non-empty, we must be at the
1607   // end of a split argument that must be passed indirectly.
1608   if (!PendingLocs.empty()) {
1609     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
1610     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
1611 
1612     for (auto &It : PendingLocs) {
1613       if (Reg)
1614         It.convertToReg(Reg);
1615       else
1616         It.convertToMem(StackOffset);
1617       State.addLoc(It);
1618     }
1619     PendingLocs.clear();
1620     PendingArgFlags.clear();
1621     return false;
1622   }
1623 
1624   assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) &&
1625          "Expected an XLenVT at this stage");
1626 
1627   if (Reg) {
1628     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1629     return false;
1630   }
1631 
1632   // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
1633   if (ValVT == MVT::f32 || ValVT == MVT::f64) {
1634     LocVT = ValVT;
1635     LocInfo = CCValAssign::Full;
1636   }
1637   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1638   return false;
1639 }
1640 
1641 void RISCVTargetLowering::analyzeInputArgs(
1642     MachineFunction &MF, CCState &CCInfo,
1643     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
1644   unsigned NumArgs = Ins.size();
1645   FunctionType *FType = MF.getFunction().getFunctionType();
1646 
1647   for (unsigned i = 0; i != NumArgs; ++i) {
1648     MVT ArgVT = Ins[i].VT;
1649     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
1650 
1651     Type *ArgTy = nullptr;
1652     if (IsRet)
1653       ArgTy = FType->getReturnType();
1654     else if (Ins[i].isOrigArg())
1655       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
1656 
1657     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
1658     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
1659                  ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
1660       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
1661                         << EVT(ArgVT).getEVTString() << '\n');
1662       llvm_unreachable(nullptr);
1663     }
1664   }
1665 }
1666 
1667 void RISCVTargetLowering::analyzeOutputArgs(
1668     MachineFunction &MF, CCState &CCInfo,
1669     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
1670     CallLoweringInfo *CLI) const {
1671   unsigned NumArgs = Outs.size();
1672 
1673   for (unsigned i = 0; i != NumArgs; i++) {
1674     MVT ArgVT = Outs[i].VT;
1675     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1676     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
1677 
1678     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
1679     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
1680                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
1681       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
1682                         << EVT(ArgVT).getEVTString() << "\n");
1683       llvm_unreachable(nullptr);
1684     }
1685   }
1686 }
1687 
1688 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
1689 // values.
1690 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
1691                                    const CCValAssign &VA, const SDLoc &DL) {
1692   switch (VA.getLocInfo()) {
1693   default:
1694     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1695   case CCValAssign::Full:
1696     break;
1697   case CCValAssign::BCvt:
1698     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1699       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
1700       break;
1701     }
1702     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
1703     break;
1704   }
1705   return Val;
1706 }
1707 
1708 // The caller is responsible for loading the full value if the argument is
1709 // passed with CCValAssign::Indirect.
1710 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
1711                                 const CCValAssign &VA, const SDLoc &DL) {
1712   MachineFunction &MF = DAG.getMachineFunction();
1713   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1714   EVT LocVT = VA.getLocVT();
1715   SDValue Val;
1716   const TargetRegisterClass *RC;
1717 
1718   switch (LocVT.getSimpleVT().SimpleTy) {
1719   default:
1720     llvm_unreachable("Unexpected register type");
1721   case MVT::i32:
1722   case MVT::i64:
1723     RC = &RISCV::GPRRegClass;
1724     break;
1725   case MVT::f32:
1726     RC = &RISCV::FPR32RegClass;
1727     break;
1728   case MVT::f64:
1729     RC = &RISCV::FPR64RegClass;
1730     break;
1731   }
1732 
1733   Register VReg = RegInfo.createVirtualRegister(RC);
1734   RegInfo.addLiveIn(VA.getLocReg(), VReg);
1735   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1736 
1737   if (VA.getLocInfo() == CCValAssign::Indirect)
1738     return Val;
1739 
1740   return convertLocVTToValVT(DAG, Val, VA, DL);
1741 }
1742 
1743 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
1744                                    const CCValAssign &VA, const SDLoc &DL) {
1745   EVT LocVT = VA.getLocVT();
1746 
1747   switch (VA.getLocInfo()) {
1748   default:
1749     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1750   case CCValAssign::Full:
1751     break;
1752   case CCValAssign::BCvt:
1753     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1754       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
1755       break;
1756     }
1757     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
1758     break;
1759   }
1760   return Val;
1761 }
1762 
1763 // The caller is responsible for loading the full value if the argument is
1764 // passed with CCValAssign::Indirect.
1765 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
1766                                 const CCValAssign &VA, const SDLoc &DL) {
1767   MachineFunction &MF = DAG.getMachineFunction();
1768   MachineFrameInfo &MFI = MF.getFrameInfo();
1769   EVT LocVT = VA.getLocVT();
1770   EVT ValVT = VA.getValVT();
1771   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
1772   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1773                                  VA.getLocMemOffset(), /*Immutable=*/true);
1774   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1775   SDValue Val;
1776 
1777   ISD::LoadExtType ExtType;
1778   switch (VA.getLocInfo()) {
1779   default:
1780     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1781   case CCValAssign::Full:
1782   case CCValAssign::Indirect:
1783   case CCValAssign::BCvt:
1784     ExtType = ISD::NON_EXTLOAD;
1785     break;
1786   }
1787   Val = DAG.getExtLoad(
1788       ExtType, DL, LocVT, Chain, FIN,
1789       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
1790   return Val;
1791 }
1792 
1793 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
1794                                        const CCValAssign &VA, const SDLoc &DL) {
1795   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
1796          "Unexpected VA");
1797   MachineFunction &MF = DAG.getMachineFunction();
1798   MachineFrameInfo &MFI = MF.getFrameInfo();
1799   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1800 
1801   if (VA.isMemLoc()) {
1802     // f64 is passed on the stack.
1803     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
1804     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1805     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
1806                        MachinePointerInfo::getFixedStack(MF, FI));
1807   }
1808 
1809   assert(VA.isRegLoc() && "Expected register VA assignment");
1810 
1811   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1812   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
1813   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
1814   SDValue Hi;
1815   if (VA.getLocReg() == RISCV::X17) {
1816     // Second half of f64 is passed on the stack.
1817     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
1818     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1819     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
1820                      MachinePointerInfo::getFixedStack(MF, FI));
1821   } else {
1822     // Second half of f64 is passed in another GPR.
1823     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1824     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
1825     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
1826   }
1827   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1828 }
1829 
1830 // FastCC has less than 1% performance improvement for some particular
1831 // benchmark. But theoretically, it may has benenfit for some cases.
1832 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
1833                             CCValAssign::LocInfo LocInfo,
1834                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
1835 
1836   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
1837     // X5 and X6 might be used for save-restore libcall.
1838     static const MCPhysReg GPRList[] = {
1839         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
1840         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
1841         RISCV::X29, RISCV::X30, RISCV::X31};
1842     if (unsigned Reg = State.AllocateReg(GPRList)) {
1843       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1844       return false;
1845     }
1846   }
1847 
1848   if (LocVT == MVT::f32) {
1849     static const MCPhysReg FPR32List[] = {
1850         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
1851         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
1852         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
1853         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
1854     if (unsigned Reg = State.AllocateReg(FPR32List)) {
1855       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1856       return false;
1857     }
1858   }
1859 
1860   if (LocVT == MVT::f64) {
1861     static const MCPhysReg FPR64List[] = {
1862         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
1863         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
1864         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
1865         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
1866     if (unsigned Reg = State.AllocateReg(FPR64List)) {
1867       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1868       return false;
1869     }
1870   }
1871 
1872   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
1873     unsigned Offset4 = State.AllocateStack(4, 4);
1874     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
1875     return false;
1876   }
1877 
1878   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
1879     unsigned Offset5 = State.AllocateStack(8, 8);
1880     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
1881     return false;
1882   }
1883 
1884   return true; // CC didn't match.
1885 }
1886 
1887 // Transform physical registers into virtual registers.
1888 SDValue RISCVTargetLowering::LowerFormalArguments(
1889     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1890     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1891     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1892 
1893   switch (CallConv) {
1894   default:
1895     report_fatal_error("Unsupported calling convention");
1896   case CallingConv::C:
1897   case CallingConv::Fast:
1898     break;
1899   }
1900 
1901   MachineFunction &MF = DAG.getMachineFunction();
1902 
1903   const Function &Func = MF.getFunction();
1904   if (Func.hasFnAttribute("interrupt")) {
1905     if (!Func.arg_empty())
1906       report_fatal_error(
1907         "Functions with the interrupt attribute cannot have arguments!");
1908 
1909     StringRef Kind =
1910       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1911 
1912     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
1913       report_fatal_error(
1914         "Function interrupt attribute argument not supported!");
1915   }
1916 
1917   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1918   MVT XLenVT = Subtarget.getXLenVT();
1919   unsigned XLenInBytes = Subtarget.getXLen() / 8;
1920   // Used with vargs to acumulate store chains.
1921   std::vector<SDValue> OutChains;
1922 
1923   // Assign locations to all of the incoming arguments.
1924   SmallVector<CCValAssign, 16> ArgLocs;
1925   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1926 
1927   if (CallConv == CallingConv::Fast)
1928     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
1929   else
1930     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
1931 
1932   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1933     CCValAssign &VA = ArgLocs[i];
1934     SDValue ArgValue;
1935     // Passing f64 on RV32D with a soft float ABI must be handled as a special
1936     // case.
1937     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
1938       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
1939     else if (VA.isRegLoc())
1940       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
1941     else
1942       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
1943 
1944     if (VA.getLocInfo() == CCValAssign::Indirect) {
1945       // If the original argument was split and passed by reference (e.g. i128
1946       // on RV32), we need to load all parts of it here (using the same
1947       // address).
1948       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1949                                    MachinePointerInfo()));
1950       unsigned ArgIndex = Ins[i].OrigArgIndex;
1951       assert(Ins[i].PartOffset == 0);
1952       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
1953         CCValAssign &PartVA = ArgLocs[i + 1];
1954         unsigned PartOffset = Ins[i + 1].PartOffset;
1955         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1956                                       DAG.getIntPtrConstant(PartOffset, DL));
1957         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1958                                      MachinePointerInfo()));
1959         ++i;
1960       }
1961       continue;
1962     }
1963     InVals.push_back(ArgValue);
1964   }
1965 
1966   if (IsVarArg) {
1967     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
1968     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
1969     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1970     MachineFrameInfo &MFI = MF.getFrameInfo();
1971     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1972     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1973 
1974     // Offset of the first variable argument from stack pointer, and size of
1975     // the vararg save area. For now, the varargs save area is either zero or
1976     // large enough to hold a0-a7.
1977     int VaArgOffset, VarArgsSaveSize;
1978 
1979     // If all registers are allocated, then all varargs must be passed on the
1980     // stack and we don't need to save any argregs.
1981     if (ArgRegs.size() == Idx) {
1982       VaArgOffset = CCInfo.getNextStackOffset();
1983       VarArgsSaveSize = 0;
1984     } else {
1985       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
1986       VaArgOffset = -VarArgsSaveSize;
1987     }
1988 
1989     // Record the frame index of the first variable argument
1990     // which is a value necessary to VASTART.
1991     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1992     RVFI->setVarArgsFrameIndex(FI);
1993 
1994     // If saving an odd number of registers then create an extra stack slot to
1995     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
1996     // offsets to even-numbered registered remain 2*XLEN-aligned.
1997     if (Idx % 2) {
1998       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
1999       VarArgsSaveSize += XLenInBytes;
2000     }
2001 
2002     // Copy the integer registers that may have been used for passing varargs
2003     // to the vararg save area.
2004     for (unsigned I = Idx; I < ArgRegs.size();
2005          ++I, VaArgOffset += XLenInBytes) {
2006       const Register Reg = RegInfo.createVirtualRegister(RC);
2007       RegInfo.addLiveIn(ArgRegs[I], Reg);
2008       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
2009       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
2010       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2011       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
2012                                    MachinePointerInfo::getFixedStack(MF, FI));
2013       cast<StoreSDNode>(Store.getNode())
2014           ->getMemOperand()
2015           ->setValue((Value *)nullptr);
2016       OutChains.push_back(Store);
2017     }
2018     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
2019   }
2020 
2021   // All stores are grouped in one node to allow the matching between
2022   // the size of Ins and InVals. This only happens for vararg functions.
2023   if (!OutChains.empty()) {
2024     OutChains.push_back(Chain);
2025     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
2026   }
2027 
2028   return Chain;
2029 }
2030 
2031 /// isEligibleForTailCallOptimization - Check whether the call is eligible
2032 /// for tail call optimization.
2033 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
2034 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
2035     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
2036     const SmallVector<CCValAssign, 16> &ArgLocs) const {
2037 
2038   auto &Callee = CLI.Callee;
2039   auto CalleeCC = CLI.CallConv;
2040   auto &Outs = CLI.Outs;
2041   auto &Caller = MF.getFunction();
2042   auto CallerCC = Caller.getCallingConv();
2043 
2044   // Exception-handling functions need a special set of instructions to
2045   // indicate a return to the hardware. Tail-calling another function would
2046   // probably break this.
2047   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
2048   // should be expanded as new function attributes are introduced.
2049   if (Caller.hasFnAttribute("interrupt"))
2050     return false;
2051 
2052   // Do not tail call opt if the stack is used to pass parameters.
2053   if (CCInfo.getNextStackOffset() != 0)
2054     return false;
2055 
2056   // Do not tail call opt if any parameters need to be passed indirectly.
2057   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
2058   // passed indirectly. So the address of the value will be passed in a
2059   // register, or if not available, then the address is put on the stack. In
2060   // order to pass indirectly, space on the stack often needs to be allocated
2061   // in order to store the value. In this case the CCInfo.getNextStackOffset()
2062   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
2063   // are passed CCValAssign::Indirect.
2064   for (auto &VA : ArgLocs)
2065     if (VA.getLocInfo() == CCValAssign::Indirect)
2066       return false;
2067 
2068   // Do not tail call opt if either caller or callee uses struct return
2069   // semantics.
2070   auto IsCallerStructRet = Caller.hasStructRetAttr();
2071   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
2072   if (IsCallerStructRet || IsCalleeStructRet)
2073     return false;
2074 
2075   // Externally-defined functions with weak linkage should not be
2076   // tail-called. The behaviour of branch instructions in this situation (as
2077   // used for tail calls) is implementation-defined, so we cannot rely on the
2078   // linker replacing the tail call with a return.
2079   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2080     const GlobalValue *GV = G->getGlobal();
2081     if (GV->hasExternalWeakLinkage())
2082       return false;
2083   }
2084 
2085   // The callee has to preserve all registers the caller needs to preserve.
2086   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
2087   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2088   if (CalleeCC != CallerCC) {
2089     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2090     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2091       return false;
2092   }
2093 
2094   // Byval parameters hand the function a pointer directly into the stack area
2095   // we want to reuse during a tail call. Working around this *is* possible
2096   // but less efficient and uglier in LowerCall.
2097   for (auto &Arg : Outs)
2098     if (Arg.Flags.isByVal())
2099       return false;
2100 
2101   return true;
2102 }
2103 
2104 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
2105 // and output parameter nodes.
2106 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
2107                                        SmallVectorImpl<SDValue> &InVals) const {
2108   SelectionDAG &DAG = CLI.DAG;
2109   SDLoc &DL = CLI.DL;
2110   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2111   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2112   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2113   SDValue Chain = CLI.Chain;
2114   SDValue Callee = CLI.Callee;
2115   bool &IsTailCall = CLI.IsTailCall;
2116   CallingConv::ID CallConv = CLI.CallConv;
2117   bool IsVarArg = CLI.IsVarArg;
2118   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2119   MVT XLenVT = Subtarget.getXLenVT();
2120 
2121   MachineFunction &MF = DAG.getMachineFunction();
2122 
2123   // Analyze the operands of the call, assigning locations to each operand.
2124   SmallVector<CCValAssign, 16> ArgLocs;
2125   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2126 
2127   if (CallConv == CallingConv::Fast)
2128     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
2129   else
2130     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
2131 
2132   // Check if it's really possible to do a tail call.
2133   if (IsTailCall)
2134     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
2135 
2136   if (IsTailCall)
2137     ++NumTailCalls;
2138   else if (CLI.CS && CLI.CS.isMustTailCall())
2139     report_fatal_error("failed to perform tail call elimination on a call "
2140                        "site marked musttail");
2141 
2142   // Get a count of how many bytes are to be pushed on the stack.
2143   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
2144 
2145   // Create local copies for byval args
2146   SmallVector<SDValue, 8> ByValArgs;
2147   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2148     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2149     if (!Flags.isByVal())
2150       continue;
2151 
2152     SDValue Arg = OutVals[i];
2153     unsigned Size = Flags.getByValSize();
2154     Align Alignment = Flags.getNonZeroByValAlign();
2155 
2156     int FI =
2157         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
2158     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2159     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
2160 
2161     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
2162                           /*IsVolatile=*/false,
2163                           /*AlwaysInline=*/false, IsTailCall,
2164                           MachinePointerInfo(), MachinePointerInfo());
2165     ByValArgs.push_back(FIPtr);
2166   }
2167 
2168   if (!IsTailCall)
2169     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
2170 
2171   // Copy argument values to their designated locations.
2172   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
2173   SmallVector<SDValue, 8> MemOpChains;
2174   SDValue StackPtr;
2175   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
2176     CCValAssign &VA = ArgLocs[i];
2177     SDValue ArgValue = OutVals[i];
2178     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2179 
2180     // Handle passing f64 on RV32D with a soft float ABI as a special case.
2181     bool IsF64OnRV32DSoftABI =
2182         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
2183     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
2184       SDValue SplitF64 = DAG.getNode(
2185           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
2186       SDValue Lo = SplitF64.getValue(0);
2187       SDValue Hi = SplitF64.getValue(1);
2188 
2189       Register RegLo = VA.getLocReg();
2190       RegsToPass.push_back(std::make_pair(RegLo, Lo));
2191 
2192       if (RegLo == RISCV::X17) {
2193         // Second half of f64 is passed on the stack.
2194         // Work out the address of the stack slot.
2195         if (!StackPtr.getNode())
2196           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
2197         // Emit the store.
2198         MemOpChains.push_back(
2199             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
2200       } else {
2201         // Second half of f64 is passed in another GPR.
2202         assert(RegLo < RISCV::X31 && "Invalid register pair");
2203         Register RegHigh = RegLo + 1;
2204         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
2205       }
2206       continue;
2207     }
2208 
2209     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
2210     // as any other MemLoc.
2211 
2212     // Promote the value if needed.
2213     // For now, only handle fully promoted and indirect arguments.
2214     if (VA.getLocInfo() == CCValAssign::Indirect) {
2215       // Store the argument in a stack slot and pass its address.
2216       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
2217       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2218       MemOpChains.push_back(
2219           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2220                        MachinePointerInfo::getFixedStack(MF, FI)));
2221       // If the original argument was split (e.g. i128), we need
2222       // to store all parts of it here (and pass just one address).
2223       unsigned ArgIndex = Outs[i].OrigArgIndex;
2224       assert(Outs[i].PartOffset == 0);
2225       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
2226         SDValue PartValue = OutVals[i + 1];
2227         unsigned PartOffset = Outs[i + 1].PartOffset;
2228         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2229                                       DAG.getIntPtrConstant(PartOffset, DL));
2230         MemOpChains.push_back(
2231             DAG.getStore(Chain, DL, PartValue, Address,
2232                          MachinePointerInfo::getFixedStack(MF, FI)));
2233         ++i;
2234       }
2235       ArgValue = SpillSlot;
2236     } else {
2237       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
2238     }
2239 
2240     // Use local copy if it is a byval arg.
2241     if (Flags.isByVal())
2242       ArgValue = ByValArgs[j++];
2243 
2244     if (VA.isRegLoc()) {
2245       // Queue up the argument copies and emit them at the end.
2246       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2247     } else {
2248       assert(VA.isMemLoc() && "Argument not register or memory");
2249       assert(!IsTailCall && "Tail call not allowed if stack is used "
2250                             "for passing parameters");
2251 
2252       // Work out the address of the stack slot.
2253       if (!StackPtr.getNode())
2254         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
2255       SDValue Address =
2256           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2257                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
2258 
2259       // Emit the store.
2260       MemOpChains.push_back(
2261           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2262     }
2263   }
2264 
2265   // Join the stores, which are independent of one another.
2266   if (!MemOpChains.empty())
2267     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2268 
2269   SDValue Glue;
2270 
2271   // Build a sequence of copy-to-reg nodes, chained and glued together.
2272   for (auto &Reg : RegsToPass) {
2273     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
2274     Glue = Chain.getValue(1);
2275   }
2276 
2277   // Validate that none of the argument registers have been marked as
2278   // reserved, if so report an error. Do the same for the return address if this
2279   // is not a tailcall.
2280   validateCCReservedRegs(RegsToPass, MF);
2281   if (!IsTailCall &&
2282       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
2283     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
2284         MF.getFunction(),
2285         "Return address register required, but has been reserved."});
2286 
2287   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
2288   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
2289   // split it and then direct call can be matched by PseudoCALL.
2290   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
2291     const GlobalValue *GV = S->getGlobal();
2292 
2293     unsigned OpFlags = RISCVII::MO_CALL;
2294     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
2295       OpFlags = RISCVII::MO_PLT;
2296 
2297     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
2298   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2299     unsigned OpFlags = RISCVII::MO_CALL;
2300 
2301     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
2302                                                  nullptr))
2303       OpFlags = RISCVII::MO_PLT;
2304 
2305     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
2306   }
2307 
2308   // The first call operand is the chain and the second is the target address.
2309   SmallVector<SDValue, 8> Ops;
2310   Ops.push_back(Chain);
2311   Ops.push_back(Callee);
2312 
2313   // Add argument registers to the end of the list so that they are
2314   // known live into the call.
2315   for (auto &Reg : RegsToPass)
2316     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2317 
2318   if (!IsTailCall) {
2319     // Add a register mask operand representing the call-preserved registers.
2320     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2321     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2322     assert(Mask && "Missing call preserved mask for calling convention");
2323     Ops.push_back(DAG.getRegisterMask(Mask));
2324   }
2325 
2326   // Glue the call to the argument copies, if any.
2327   if (Glue.getNode())
2328     Ops.push_back(Glue);
2329 
2330   // Emit the call.
2331   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2332 
2333   if (IsTailCall) {
2334     MF.getFrameInfo().setHasTailCall();
2335     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
2336   }
2337 
2338   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
2339   Glue = Chain.getValue(1);
2340 
2341   // Mark the end of the call, which is glued to the call itself.
2342   Chain = DAG.getCALLSEQ_END(Chain,
2343                              DAG.getConstant(NumBytes, DL, PtrVT, true),
2344                              DAG.getConstant(0, DL, PtrVT, true),
2345                              Glue, DL);
2346   Glue = Chain.getValue(1);
2347 
2348   // Assign locations to each value returned by this call.
2349   SmallVector<CCValAssign, 16> RVLocs;
2350   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
2351   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
2352 
2353   // Copy all of the result registers out of their specified physreg.
2354   for (auto &VA : RVLocs) {
2355     // Copy the value out
2356     SDValue RetValue =
2357         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
2358     // Glue the RetValue to the end of the call sequence
2359     Chain = RetValue.getValue(1);
2360     Glue = RetValue.getValue(2);
2361 
2362     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
2363       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
2364       SDValue RetValue2 =
2365           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
2366       Chain = RetValue2.getValue(1);
2367       Glue = RetValue2.getValue(2);
2368       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
2369                              RetValue2);
2370     }
2371 
2372     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
2373 
2374     InVals.push_back(RetValue);
2375   }
2376 
2377   return Chain;
2378 }
2379 
2380 bool RISCVTargetLowering::CanLowerReturn(
2381     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2382     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2383   SmallVector<CCValAssign, 16> RVLocs;
2384   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
2385   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2386     MVT VT = Outs[i].VT;
2387     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2388     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2389     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
2390                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
2391       return false;
2392   }
2393   return true;
2394 }
2395 
2396 SDValue
2397 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2398                                  bool IsVarArg,
2399                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
2400                                  const SmallVectorImpl<SDValue> &OutVals,
2401                                  const SDLoc &DL, SelectionDAG &DAG) const {
2402   const MachineFunction &MF = DAG.getMachineFunction();
2403   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
2404 
2405   // Stores the assignment of the return value to a location.
2406   SmallVector<CCValAssign, 16> RVLocs;
2407 
2408   // Info about the registers and stack slot.
2409   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
2410                  *DAG.getContext());
2411 
2412   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
2413                     nullptr);
2414 
2415   SDValue Glue;
2416   SmallVector<SDValue, 4> RetOps(1, Chain);
2417 
2418   // Copy the result values into the output registers.
2419   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
2420     SDValue Val = OutVals[i];
2421     CCValAssign &VA = RVLocs[i];
2422     assert(VA.isRegLoc() && "Can only return in registers!");
2423 
2424     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
2425       // Handle returning f64 on RV32D with a soft float ABI.
2426       assert(VA.isRegLoc() && "Expected return via registers");
2427       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
2428                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
2429       SDValue Lo = SplitF64.getValue(0);
2430       SDValue Hi = SplitF64.getValue(1);
2431       Register RegLo = VA.getLocReg();
2432       assert(RegLo < RISCV::X31 && "Invalid register pair");
2433       Register RegHi = RegLo + 1;
2434 
2435       if (STI.isRegisterReservedByUser(RegLo) ||
2436           STI.isRegisterReservedByUser(RegHi))
2437         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
2438             MF.getFunction(),
2439             "Return value register required, but has been reserved."});
2440 
2441       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
2442       Glue = Chain.getValue(1);
2443       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
2444       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
2445       Glue = Chain.getValue(1);
2446       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
2447     } else {
2448       // Handle a 'normal' return.
2449       Val = convertValVTToLocVT(DAG, Val, VA, DL);
2450       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
2451 
2452       if (STI.isRegisterReservedByUser(VA.getLocReg()))
2453         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
2454             MF.getFunction(),
2455             "Return value register required, but has been reserved."});
2456 
2457       // Guarantee that all emitted copies are stuck together.
2458       Glue = Chain.getValue(1);
2459       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2460     }
2461   }
2462 
2463   RetOps[0] = Chain; // Update chain.
2464 
2465   // Add the glue node if we have it.
2466   if (Glue.getNode()) {
2467     RetOps.push_back(Glue);
2468   }
2469 
2470   // Interrupt service routines use different return instructions.
2471   const Function &Func = DAG.getMachineFunction().getFunction();
2472   if (Func.hasFnAttribute("interrupt")) {
2473     if (!Func.getReturnType()->isVoidTy())
2474       report_fatal_error(
2475           "Functions with the interrupt attribute must have void return type!");
2476 
2477     MachineFunction &MF = DAG.getMachineFunction();
2478     StringRef Kind =
2479       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
2480 
2481     unsigned RetOpc;
2482     if (Kind == "user")
2483       RetOpc = RISCVISD::URET_FLAG;
2484     else if (Kind == "supervisor")
2485       RetOpc = RISCVISD::SRET_FLAG;
2486     else
2487       RetOpc = RISCVISD::MRET_FLAG;
2488 
2489     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
2490   }
2491 
2492   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
2493 }
2494 
2495 void RISCVTargetLowering::validateCCReservedRegs(
2496     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
2497     MachineFunction &MF) const {
2498   const Function &F = MF.getFunction();
2499   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
2500 
2501   if (std::any_of(std::begin(Regs), std::end(Regs), [&STI](auto Reg) {
2502         return STI.isRegisterReservedByUser(Reg.first);
2503       }))
2504     F.getContext().diagnose(DiagnosticInfoUnsupported{
2505         F, "Argument register required, but has been reserved."});
2506 }
2507 
2508 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2509   return CI->isTailCall();
2510 }
2511 
2512 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
2513   switch ((RISCVISD::NodeType)Opcode) {
2514   case RISCVISD::FIRST_NUMBER:
2515     break;
2516   case RISCVISD::RET_FLAG:
2517     return "RISCVISD::RET_FLAG";
2518   case RISCVISD::URET_FLAG:
2519     return "RISCVISD::URET_FLAG";
2520   case RISCVISD::SRET_FLAG:
2521     return "RISCVISD::SRET_FLAG";
2522   case RISCVISD::MRET_FLAG:
2523     return "RISCVISD::MRET_FLAG";
2524   case RISCVISD::CALL:
2525     return "RISCVISD::CALL";
2526   case RISCVISD::SELECT_CC:
2527     return "RISCVISD::SELECT_CC";
2528   case RISCVISD::BuildPairF64:
2529     return "RISCVISD::BuildPairF64";
2530   case RISCVISD::SplitF64:
2531     return "RISCVISD::SplitF64";
2532   case RISCVISD::TAIL:
2533     return "RISCVISD::TAIL";
2534   case RISCVISD::SLLW:
2535     return "RISCVISD::SLLW";
2536   case RISCVISD::SRAW:
2537     return "RISCVISD::SRAW";
2538   case RISCVISD::SRLW:
2539     return "RISCVISD::SRLW";
2540   case RISCVISD::DIVW:
2541     return "RISCVISD::DIVW";
2542   case RISCVISD::DIVUW:
2543     return "RISCVISD::DIVUW";
2544   case RISCVISD::REMUW:
2545     return "RISCVISD::REMUW";
2546   case RISCVISD::FMV_W_X_RV64:
2547     return "RISCVISD::FMV_W_X_RV64";
2548   case RISCVISD::FMV_X_ANYEXTW_RV64:
2549     return "RISCVISD::FMV_X_ANYEXTW_RV64";
2550   case RISCVISD::READ_CYCLE_WIDE:
2551     return "RISCVISD::READ_CYCLE_WIDE";
2552   }
2553   return nullptr;
2554 }
2555 
2556 /// getConstraintType - Given a constraint letter, return the type of
2557 /// constraint it is for this target.
2558 RISCVTargetLowering::ConstraintType
2559 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
2560   if (Constraint.size() == 1) {
2561     switch (Constraint[0]) {
2562     default:
2563       break;
2564     case 'f':
2565       return C_RegisterClass;
2566     case 'I':
2567     case 'J':
2568     case 'K':
2569       return C_Immediate;
2570     case 'A':
2571       return C_Memory;
2572     }
2573   }
2574   return TargetLowering::getConstraintType(Constraint);
2575 }
2576 
2577 std::pair<unsigned, const TargetRegisterClass *>
2578 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2579                                                   StringRef Constraint,
2580                                                   MVT VT) const {
2581   // First, see if this is a constraint that directly corresponds to a
2582   // RISCV register class.
2583   if (Constraint.size() == 1) {
2584     switch (Constraint[0]) {
2585     case 'r':
2586       return std::make_pair(0U, &RISCV::GPRRegClass);
2587     case 'f':
2588       if (Subtarget.hasStdExtF() && VT == MVT::f32)
2589         return std::make_pair(0U, &RISCV::FPR32RegClass);
2590       if (Subtarget.hasStdExtD() && VT == MVT::f64)
2591         return std::make_pair(0U, &RISCV::FPR64RegClass);
2592       break;
2593     default:
2594       break;
2595     }
2596   }
2597 
2598   // Clang will correctly decode the usage of register name aliases into their
2599   // official names. However, other frontends like `rustc` do not. This allows
2600   // users of these frontends to use the ABI names for registers in LLVM-style
2601   // register constraints.
2602   Register XRegFromAlias = StringSwitch<Register>(Constraint.lower())
2603                                .Case("{zero}", RISCV::X0)
2604                                .Case("{ra}", RISCV::X1)
2605                                .Case("{sp}", RISCV::X2)
2606                                .Case("{gp}", RISCV::X3)
2607                                .Case("{tp}", RISCV::X4)
2608                                .Case("{t0}", RISCV::X5)
2609                                .Case("{t1}", RISCV::X6)
2610                                .Case("{t2}", RISCV::X7)
2611                                .Cases("{s0}", "{fp}", RISCV::X8)
2612                                .Case("{s1}", RISCV::X9)
2613                                .Case("{a0}", RISCV::X10)
2614                                .Case("{a1}", RISCV::X11)
2615                                .Case("{a2}", RISCV::X12)
2616                                .Case("{a3}", RISCV::X13)
2617                                .Case("{a4}", RISCV::X14)
2618                                .Case("{a5}", RISCV::X15)
2619                                .Case("{a6}", RISCV::X16)
2620                                .Case("{a7}", RISCV::X17)
2621                                .Case("{s2}", RISCV::X18)
2622                                .Case("{s3}", RISCV::X19)
2623                                .Case("{s4}", RISCV::X20)
2624                                .Case("{s5}", RISCV::X21)
2625                                .Case("{s6}", RISCV::X22)
2626                                .Case("{s7}", RISCV::X23)
2627                                .Case("{s8}", RISCV::X24)
2628                                .Case("{s9}", RISCV::X25)
2629                                .Case("{s10}", RISCV::X26)
2630                                .Case("{s11}", RISCV::X27)
2631                                .Case("{t3}", RISCV::X28)
2632                                .Case("{t4}", RISCV::X29)
2633                                .Case("{t5}", RISCV::X30)
2634                                .Case("{t6}", RISCV::X31)
2635                                .Default(RISCV::NoRegister);
2636   if (XRegFromAlias != RISCV::NoRegister)
2637     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
2638 
2639   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
2640   // TableGen record rather than the AsmName to choose registers for InlineAsm
2641   // constraints, plus we want to match those names to the widest floating point
2642   // register type available, manually select floating point registers here.
2643   //
2644   // The second case is the ABI name of the register, so that frontends can also
2645   // use the ABI names in register constraint lists.
2646   if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) {
2647     std::pair<Register, Register> FReg =
2648         StringSwitch<std::pair<Register, Register>>(Constraint.lower())
2649             .Cases("{f0}", "{ft0}", {RISCV::F0_F, RISCV::F0_D})
2650             .Cases("{f1}", "{ft1}", {RISCV::F1_F, RISCV::F1_D})
2651             .Cases("{f2}", "{ft2}", {RISCV::F2_F, RISCV::F2_D})
2652             .Cases("{f3}", "{ft3}", {RISCV::F3_F, RISCV::F3_D})
2653             .Cases("{f4}", "{ft4}", {RISCV::F4_F, RISCV::F4_D})
2654             .Cases("{f5}", "{ft5}", {RISCV::F5_F, RISCV::F5_D})
2655             .Cases("{f6}", "{ft6}", {RISCV::F6_F, RISCV::F6_D})
2656             .Cases("{f7}", "{ft7}", {RISCV::F7_F, RISCV::F7_D})
2657             .Cases("{f8}", "{fs0}", {RISCV::F8_F, RISCV::F8_D})
2658             .Cases("{f9}", "{fs1}", {RISCV::F9_F, RISCV::F9_D})
2659             .Cases("{f10}", "{fa0}", {RISCV::F10_F, RISCV::F10_D})
2660             .Cases("{f11}", "{fa1}", {RISCV::F11_F, RISCV::F11_D})
2661             .Cases("{f12}", "{fa2}", {RISCV::F12_F, RISCV::F12_D})
2662             .Cases("{f13}", "{fa3}", {RISCV::F13_F, RISCV::F13_D})
2663             .Cases("{f14}", "{fa4}", {RISCV::F14_F, RISCV::F14_D})
2664             .Cases("{f15}", "{fa5}", {RISCV::F15_F, RISCV::F15_D})
2665             .Cases("{f16}", "{fa6}", {RISCV::F16_F, RISCV::F16_D})
2666             .Cases("{f17}", "{fa7}", {RISCV::F17_F, RISCV::F17_D})
2667             .Cases("{f18}", "{fs2}", {RISCV::F18_F, RISCV::F18_D})
2668             .Cases("{f19}", "{fs3}", {RISCV::F19_F, RISCV::F19_D})
2669             .Cases("{f20}", "{fs4}", {RISCV::F20_F, RISCV::F20_D})
2670             .Cases("{f21}", "{fs5}", {RISCV::F21_F, RISCV::F21_D})
2671             .Cases("{f22}", "{fs6}", {RISCV::F22_F, RISCV::F22_D})
2672             .Cases("{f23}", "{fs7}", {RISCV::F23_F, RISCV::F23_D})
2673             .Cases("{f24}", "{fs8}", {RISCV::F24_F, RISCV::F24_D})
2674             .Cases("{f25}", "{fs9}", {RISCV::F25_F, RISCV::F25_D})
2675             .Cases("{f26}", "{fs10}", {RISCV::F26_F, RISCV::F26_D})
2676             .Cases("{f27}", "{fs11}", {RISCV::F27_F, RISCV::F27_D})
2677             .Cases("{f28}", "{ft8}", {RISCV::F28_F, RISCV::F28_D})
2678             .Cases("{f29}", "{ft9}", {RISCV::F29_F, RISCV::F29_D})
2679             .Cases("{f30}", "{ft10}", {RISCV::F30_F, RISCV::F30_D})
2680             .Cases("{f31}", "{ft11}", {RISCV::F31_F, RISCV::F31_D})
2681             .Default({RISCV::NoRegister, RISCV::NoRegister});
2682     if (FReg.first != RISCV::NoRegister)
2683       return Subtarget.hasStdExtD()
2684                  ? std::make_pair(FReg.second, &RISCV::FPR64RegClass)
2685                  : std::make_pair(FReg.first, &RISCV::FPR32RegClass);
2686   }
2687 
2688   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2689 }
2690 
2691 unsigned
2692 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
2693   // Currently only support length 1 constraints.
2694   if (ConstraintCode.size() == 1) {
2695     switch (ConstraintCode[0]) {
2696     case 'A':
2697       return InlineAsm::Constraint_A;
2698     default:
2699       break;
2700     }
2701   }
2702 
2703   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
2704 }
2705 
2706 void RISCVTargetLowering::LowerAsmOperandForConstraint(
2707     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
2708     SelectionDAG &DAG) const {
2709   // Currently only support length 1 constraints.
2710   if (Constraint.length() == 1) {
2711     switch (Constraint[0]) {
2712     case 'I':
2713       // Validate & create a 12-bit signed immediate operand.
2714       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
2715         uint64_t CVal = C->getSExtValue();
2716         if (isInt<12>(CVal))
2717           Ops.push_back(
2718               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
2719       }
2720       return;
2721     case 'J':
2722       // Validate & create an integer zero operand.
2723       if (auto *C = dyn_cast<ConstantSDNode>(Op))
2724         if (C->getZExtValue() == 0)
2725           Ops.push_back(
2726               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
2727       return;
2728     case 'K':
2729       // Validate & create a 5-bit unsigned immediate operand.
2730       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
2731         uint64_t CVal = C->getZExtValue();
2732         if (isUInt<5>(CVal))
2733           Ops.push_back(
2734               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
2735       }
2736       return;
2737     default:
2738       break;
2739     }
2740   }
2741   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2742 }
2743 
2744 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
2745                                                    Instruction *Inst,
2746                                                    AtomicOrdering Ord) const {
2747   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
2748     return Builder.CreateFence(Ord);
2749   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
2750     return Builder.CreateFence(AtomicOrdering::Release);
2751   return nullptr;
2752 }
2753 
2754 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
2755                                                     Instruction *Inst,
2756                                                     AtomicOrdering Ord) const {
2757   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
2758     return Builder.CreateFence(AtomicOrdering::Acquire);
2759   return nullptr;
2760 }
2761 
2762 TargetLowering::AtomicExpansionKind
2763 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2764   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
2765   // point operations can't be used in an lr/sc sequence without breaking the
2766   // forward-progress guarantee.
2767   if (AI->isFloatingPointOperation())
2768     return AtomicExpansionKind::CmpXChg;
2769 
2770   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
2771   if (Size == 8 || Size == 16)
2772     return AtomicExpansionKind::MaskedIntrinsic;
2773   return AtomicExpansionKind::None;
2774 }
2775 
2776 static Intrinsic::ID
2777 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
2778   if (XLen == 32) {
2779     switch (BinOp) {
2780     default:
2781       llvm_unreachable("Unexpected AtomicRMW BinOp");
2782     case AtomicRMWInst::Xchg:
2783       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
2784     case AtomicRMWInst::Add:
2785       return Intrinsic::riscv_masked_atomicrmw_add_i32;
2786     case AtomicRMWInst::Sub:
2787       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
2788     case AtomicRMWInst::Nand:
2789       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
2790     case AtomicRMWInst::Max:
2791       return Intrinsic::riscv_masked_atomicrmw_max_i32;
2792     case AtomicRMWInst::Min:
2793       return Intrinsic::riscv_masked_atomicrmw_min_i32;
2794     case AtomicRMWInst::UMax:
2795       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
2796     case AtomicRMWInst::UMin:
2797       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
2798     }
2799   }
2800 
2801   if (XLen == 64) {
2802     switch (BinOp) {
2803     default:
2804       llvm_unreachable("Unexpected AtomicRMW BinOp");
2805     case AtomicRMWInst::Xchg:
2806       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
2807     case AtomicRMWInst::Add:
2808       return Intrinsic::riscv_masked_atomicrmw_add_i64;
2809     case AtomicRMWInst::Sub:
2810       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
2811     case AtomicRMWInst::Nand:
2812       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
2813     case AtomicRMWInst::Max:
2814       return Intrinsic::riscv_masked_atomicrmw_max_i64;
2815     case AtomicRMWInst::Min:
2816       return Intrinsic::riscv_masked_atomicrmw_min_i64;
2817     case AtomicRMWInst::UMax:
2818       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
2819     case AtomicRMWInst::UMin:
2820       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
2821     }
2822   }
2823 
2824   llvm_unreachable("Unexpected XLen\n");
2825 }
2826 
2827 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
2828     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
2829     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
2830   unsigned XLen = Subtarget.getXLen();
2831   Value *Ordering =
2832       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
2833   Type *Tys[] = {AlignedAddr->getType()};
2834   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
2835       AI->getModule(),
2836       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
2837 
2838   if (XLen == 64) {
2839     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
2840     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2841     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
2842   }
2843 
2844   Value *Result;
2845 
2846   // Must pass the shift amount needed to sign extend the loaded value prior
2847   // to performing a signed comparison for min/max. ShiftAmt is the number of
2848   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
2849   // is the number of bits to left+right shift the value in order to
2850   // sign-extend.
2851   if (AI->getOperation() == AtomicRMWInst::Min ||
2852       AI->getOperation() == AtomicRMWInst::Max) {
2853     const DataLayout &DL = AI->getModule()->getDataLayout();
2854     unsigned ValWidth =
2855         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
2856     Value *SextShamt =
2857         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
2858     Result = Builder.CreateCall(LrwOpScwLoop,
2859                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
2860   } else {
2861     Result =
2862         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
2863   }
2864 
2865   if (XLen == 64)
2866     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2867   return Result;
2868 }
2869 
2870 TargetLowering::AtomicExpansionKind
2871 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
2872     AtomicCmpXchgInst *CI) const {
2873   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
2874   if (Size == 8 || Size == 16)
2875     return AtomicExpansionKind::MaskedIntrinsic;
2876   return AtomicExpansionKind::None;
2877 }
2878 
2879 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2880     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2881     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2882   unsigned XLen = Subtarget.getXLen();
2883   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
2884   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
2885   if (XLen == 64) {
2886     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
2887     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
2888     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2889     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
2890   }
2891   Type *Tys[] = {AlignedAddr->getType()};
2892   Function *MaskedCmpXchg =
2893       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
2894   Value *Result = Builder.CreateCall(
2895       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
2896   if (XLen == 64)
2897     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2898   return Result;
2899 }
2900 
2901 unsigned RISCVTargetLowering::getExceptionPointerRegister(
2902     const Constant *PersonalityFn) const {
2903   return RISCV::X10;
2904 }
2905 
2906 unsigned RISCVTargetLowering::getExceptionSelectorRegister(
2907     const Constant *PersonalityFn) const {
2908   return RISCV::X11;
2909 }
2910 
2911 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
2912   // Return false to suppress the unnecessary extensions if the LibCall
2913   // arguments or return value is f32 type for LP64 ABI.
2914   RISCVABI::ABI ABI = Subtarget.getTargetABI();
2915   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
2916     return false;
2917 
2918   return true;
2919 }
2920 
2921 #define GET_REGISTER_MATCHER
2922 #include "RISCVGenAsmMatcher.inc"
2923 
2924 Register
2925 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
2926                                        const MachineFunction &MF) const {
2927   Register Reg = MatchRegisterAltName(RegName);
2928   if (Reg == RISCV::NoRegister)
2929     Reg = MatchRegisterName(RegName);
2930   if (Reg == RISCV::NoRegister)
2931     report_fatal_error(
2932         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
2933   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
2934   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
2935     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
2936                              StringRef(RegName) + "\"."));
2937   return Reg;
2938 }
2939