1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that RISCV uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "RISCVISelLowering.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAGISel.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "riscv-lower"
39 
40 STATISTIC(NumTailCalls, "Number of tail calls");
41 
42 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
43                                          const RISCVSubtarget &STI)
44     : TargetLowering(TM), Subtarget(STI) {
45 
46   MVT XLenVT = Subtarget.getXLenVT();
47 
48   // Set up the register classes.
49   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
50 
51   if (Subtarget.hasStdExtF())
52     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
53   if (Subtarget.hasStdExtD())
54     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
55 
56   // Compute derived properties from the register classes.
57   computeRegisterProperties(STI.getRegisterInfo());
58 
59   setStackPointerRegisterToSaveRestore(RISCV::X2);
60 
61   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
62     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
63 
64   // TODO: add all necessary setOperationAction calls.
65   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
66 
67   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
68   setOperationAction(ISD::BR_CC, XLenVT, Expand);
69   setOperationAction(ISD::SELECT, XLenVT, Custom);
70   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
71 
72   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
73   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
74 
75   setOperationAction(ISD::VASTART, MVT::Other, Custom);
76   setOperationAction(ISD::VAARG, MVT::Other, Expand);
77   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
78   setOperationAction(ISD::VAEND, MVT::Other, Expand);
79 
80   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
81     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
82 
83   if (!Subtarget.hasStdExtM()) {
84     setOperationAction(ISD::MUL, XLenVT, Expand);
85     setOperationAction(ISD::MULHS, XLenVT, Expand);
86     setOperationAction(ISD::MULHU, XLenVT, Expand);
87     setOperationAction(ISD::SDIV, XLenVT, Expand);
88     setOperationAction(ISD::UDIV, XLenVT, Expand);
89     setOperationAction(ISD::SREM, XLenVT, Expand);
90     setOperationAction(ISD::UREM, XLenVT, Expand);
91   }
92 
93   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
94   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
95   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
96   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
97 
98   setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
99   setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
100   setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
101 
102   setOperationAction(ISD::ROTL, XLenVT, Expand);
103   setOperationAction(ISD::ROTR, XLenVT, Expand);
104   setOperationAction(ISD::BSWAP, XLenVT, Expand);
105   setOperationAction(ISD::CTTZ, XLenVT, Expand);
106   setOperationAction(ISD::CTLZ, XLenVT, Expand);
107   setOperationAction(ISD::CTPOP, XLenVT, Expand);
108 
109   ISD::CondCode FPCCToExtend[] = {
110       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO,   ISD::SETUEQ,
111       ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
112       ISD::SETGT,  ISD::SETGE,  ISD::SETNE};
113 
114   ISD::NodeType FPOpToExtend[] = {
115       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
116 
117   if (Subtarget.hasStdExtF()) {
118     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
119     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
120     for (auto CC : FPCCToExtend)
121       setCondCodeAction(CC, MVT::f32, Expand);
122     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
123     setOperationAction(ISD::SELECT, MVT::f32, Custom);
124     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
125     for (auto Op : FPOpToExtend)
126       setOperationAction(Op, MVT::f32, Expand);
127   }
128 
129   if (Subtarget.hasStdExtD()) {
130     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
131     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
132     for (auto CC : FPCCToExtend)
133       setCondCodeAction(CC, MVT::f64, Expand);
134     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
135     setOperationAction(ISD::SELECT, MVT::f64, Custom);
136     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
137     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
138     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
139     for (auto Op : FPOpToExtend)
140       setOperationAction(Op, MVT::f64, Expand);
141   }
142 
143   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
144   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
145   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
146 
147   if (Subtarget.hasStdExtA()) {
148     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
149     setMinCmpXchgSizeInBits(32);
150   } else {
151     setMaxAtomicSizeInBitsSupported(0);
152   }
153 
154   setBooleanContents(ZeroOrOneBooleanContent);
155 
156   // Function alignments (log2).
157   unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2;
158   setMinFunctionAlignment(FunctionAlignment);
159   setPrefFunctionAlignment(FunctionAlignment);
160 
161   // Effectively disable jump table generation.
162   setMinimumJumpTableEntries(INT_MAX);
163 }
164 
165 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
166                                             EVT VT) const {
167   if (!VT.isVector())
168     return getPointerTy(DL);
169   return VT.changeVectorElementTypeToInteger();
170 }
171 
172 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
173                                              const CallInst &I,
174                                              MachineFunction &MF,
175                                              unsigned Intrinsic) const {
176   switch (Intrinsic) {
177   default:
178     return false;
179   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
180   case Intrinsic::riscv_masked_atomicrmw_add_i32:
181   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
182   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
183   case Intrinsic::riscv_masked_atomicrmw_max_i32:
184   case Intrinsic::riscv_masked_atomicrmw_min_i32:
185   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
186   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
187   case Intrinsic::riscv_masked_cmpxchg_i32:
188     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
189     Info.opc = ISD::INTRINSIC_W_CHAIN;
190     Info.memVT = MVT::getVT(PtrTy->getElementType());
191     Info.ptrVal = I.getArgOperand(0);
192     Info.offset = 0;
193     Info.align = 4;
194     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
195                  MachineMemOperand::MOVolatile;
196     return true;
197   }
198 }
199 
200 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
201                                                 const AddrMode &AM, Type *Ty,
202                                                 unsigned AS,
203                                                 Instruction *I) const {
204   // No global is ever allowed as a base.
205   if (AM.BaseGV)
206     return false;
207 
208   // Require a 12-bit signed offset.
209   if (!isInt<12>(AM.BaseOffs))
210     return false;
211 
212   switch (AM.Scale) {
213   case 0: // "r+i" or just "i", depending on HasBaseReg.
214     break;
215   case 1:
216     if (!AM.HasBaseReg) // allow "r+i".
217       break;
218     return false; // disallow "r+r" or "r+r+i".
219   default:
220     return false;
221   }
222 
223   return true;
224 }
225 
226 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
227   return isInt<12>(Imm);
228 }
229 
230 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
231   return isInt<12>(Imm);
232 }
233 
234 // On RV32, 64-bit integers are split into their high and low parts and held
235 // in two different registers, so the trunc is free since the low register can
236 // just be used.
237 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
238   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
239     return false;
240   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
241   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
242   return (SrcBits == 64 && DestBits == 32);
243 }
244 
245 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
246   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
247       !SrcVT.isInteger() || !DstVT.isInteger())
248     return false;
249   unsigned SrcBits = SrcVT.getSizeInBits();
250   unsigned DestBits = DstVT.getSizeInBits();
251   return (SrcBits == 64 && DestBits == 32);
252 }
253 
254 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
255   // Zexts are free if they can be combined with a load.
256   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
257     EVT MemVT = LD->getMemoryVT();
258     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
259          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
260         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
261          LD->getExtensionType() == ISD::ZEXTLOAD))
262       return true;
263   }
264 
265   return TargetLowering::isZExtFree(Val, VT2);
266 }
267 
268 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
269   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
270 }
271 
272 // Changes the condition code and swaps operands if necessary, so the SetCC
273 // operation matches one of the comparisons supported directly in the RISC-V
274 // ISA.
275 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
276   switch (CC) {
277   default:
278     break;
279   case ISD::SETGT:
280   case ISD::SETLE:
281   case ISD::SETUGT:
282   case ISD::SETULE:
283     CC = ISD::getSetCCSwappedOperands(CC);
284     std::swap(LHS, RHS);
285     break;
286   }
287 }
288 
289 // Return the RISC-V branch opcode that matches the given DAG integer
290 // condition code. The CondCode must be one of those supported by the RISC-V
291 // ISA (see normaliseSetCC).
292 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
293   switch (CC) {
294   default:
295     llvm_unreachable("Unsupported CondCode");
296   case ISD::SETEQ:
297     return RISCV::BEQ;
298   case ISD::SETNE:
299     return RISCV::BNE;
300   case ISD::SETLT:
301     return RISCV::BLT;
302   case ISD::SETGE:
303     return RISCV::BGE;
304   case ISD::SETULT:
305     return RISCV::BLTU;
306   case ISD::SETUGE:
307     return RISCV::BGEU;
308   }
309 }
310 
311 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
312                                             SelectionDAG &DAG) const {
313   switch (Op.getOpcode()) {
314   default:
315     report_fatal_error("unimplemented operand");
316   case ISD::GlobalAddress:
317     return lowerGlobalAddress(Op, DAG);
318   case ISD::BlockAddress:
319     return lowerBlockAddress(Op, DAG);
320   case ISD::ConstantPool:
321     return lowerConstantPool(Op, DAG);
322   case ISD::SELECT:
323     return lowerSELECT(Op, DAG);
324   case ISD::VASTART:
325     return lowerVASTART(Op, DAG);
326   case ISD::FRAMEADDR:
327     return lowerFRAMEADDR(Op, DAG);
328   case ISD::RETURNADDR:
329     return lowerRETURNADDR(Op, DAG);
330   }
331 }
332 
333 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
334                                                 SelectionDAG &DAG) const {
335   SDLoc DL(Op);
336   EVT Ty = Op.getValueType();
337   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
338   const GlobalValue *GV = N->getGlobal();
339   int64_t Offset = N->getOffset();
340   MVT XLenVT = Subtarget.getXLenVT();
341 
342   if (isPositionIndependent())
343     report_fatal_error("Unable to lowerGlobalAddress");
344   // In order to maximise the opportunity for common subexpression elimination,
345   // emit a separate ADD node for the global address offset instead of folding
346   // it in the global address node. Later peephole optimisations may choose to
347   // fold it back in when profitable.
348   SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI);
349   SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);
350   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
351   SDValue MNLo =
352     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
353   if (Offset != 0)
354     return DAG.getNode(ISD::ADD, DL, Ty, MNLo,
355                        DAG.getConstant(Offset, DL, XLenVT));
356   return MNLo;
357 }
358 
359 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
360                                                SelectionDAG &DAG) const {
361   SDLoc DL(Op);
362   EVT Ty = Op.getValueType();
363   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
364   const BlockAddress *BA = N->getBlockAddress();
365   int64_t Offset = N->getOffset();
366 
367   if (isPositionIndependent())
368     report_fatal_error("Unable to lowerBlockAddress");
369 
370   SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
371   SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO);
372   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0);
373   SDValue MNLo =
374     SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0);
375   return MNLo;
376 }
377 
378 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
379                                                SelectionDAG &DAG) const {
380   SDLoc DL(Op);
381   EVT Ty = Op.getValueType();
382   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
383   const Constant *CPA = N->getConstVal();
384   int64_t Offset = N->getOffset();
385   unsigned Alignment = N->getAlignment();
386 
387   if (!isPositionIndependent()) {
388     SDValue CPAHi =
389         DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI);
390     SDValue CPALo =
391         DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO);
392     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0);
393     SDValue MNLo =
394         SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0);
395     return MNLo;
396   } else {
397     report_fatal_error("Unable to lowerConstantPool");
398   }
399 }
400 
401 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
402   SDValue CondV = Op.getOperand(0);
403   SDValue TrueV = Op.getOperand(1);
404   SDValue FalseV = Op.getOperand(2);
405   SDLoc DL(Op);
406   MVT XLenVT = Subtarget.getXLenVT();
407 
408   // If the result type is XLenVT and CondV is the output of a SETCC node
409   // which also operated on XLenVT inputs, then merge the SETCC node into the
410   // lowered RISCVISD::SELECT_CC to take advantage of the integer
411   // compare+branch instructions. i.e.:
412   // (select (setcc lhs, rhs, cc), truev, falsev)
413   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
414   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
415       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
416     SDValue LHS = CondV.getOperand(0);
417     SDValue RHS = CondV.getOperand(1);
418     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
419     ISD::CondCode CCVal = CC->get();
420 
421     normaliseSetCC(LHS, RHS, CCVal);
422 
423     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
424     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
425     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
426     return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
427   }
428 
429   // Otherwise:
430   // (select condv, truev, falsev)
431   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
432   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
433   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
434 
435   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
436   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
437 
438   return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
439 }
440 
441 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
442   MachineFunction &MF = DAG.getMachineFunction();
443   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
444 
445   SDLoc DL(Op);
446   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
447                                  getPointerTy(MF.getDataLayout()));
448 
449   // vastart just stores the address of the VarArgsFrameIndex slot into the
450   // memory location argument.
451   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
452   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
453                       MachinePointerInfo(SV));
454 }
455 
456 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
457                                             SelectionDAG &DAG) const {
458   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
459   MachineFunction &MF = DAG.getMachineFunction();
460   MachineFrameInfo &MFI = MF.getFrameInfo();
461   MFI.setFrameAddressIsTaken(true);
462   unsigned FrameReg = RI.getFrameRegister(MF);
463   int XLenInBytes = Subtarget.getXLen() / 8;
464 
465   EVT VT = Op.getValueType();
466   SDLoc DL(Op);
467   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
468   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
469   while (Depth--) {
470     int Offset = -(XLenInBytes * 2);
471     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
472                               DAG.getIntPtrConstant(Offset, DL));
473     FrameAddr =
474         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
475   }
476   return FrameAddr;
477 }
478 
479 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
480                                              SelectionDAG &DAG) const {
481   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
482   MachineFunction &MF = DAG.getMachineFunction();
483   MachineFrameInfo &MFI = MF.getFrameInfo();
484   MFI.setReturnAddressIsTaken(true);
485   MVT XLenVT = Subtarget.getXLenVT();
486   int XLenInBytes = Subtarget.getXLen() / 8;
487 
488   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
489     return SDValue();
490 
491   EVT VT = Op.getValueType();
492   SDLoc DL(Op);
493   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
494   if (Depth) {
495     int Off = -XLenInBytes;
496     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
497     SDValue Offset = DAG.getConstant(Off, DL, VT);
498     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
499                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
500                        MachinePointerInfo());
501   }
502 
503   // Return the value of the return address register, marking it an implicit
504   // live-in.
505   unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
506   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
507 }
508 
509 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
510                                                DAGCombinerInfo &DCI) const {
511   switch (N->getOpcode()) {
512   default:
513     break;
514   case RISCVISD::SplitF64: {
515     // If the input to SplitF64 is just BuildPairF64 then the operation is
516     // redundant. Instead, use BuildPairF64's operands directly.
517     SDValue Op0 = N->getOperand(0);
518     if (Op0->getOpcode() != RISCVISD::BuildPairF64)
519       break;
520     return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
521   }
522   }
523 
524   return SDValue();
525 }
526 
527 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
528                                              MachineBasicBlock *BB) {
529   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
530 
531   MachineFunction &MF = *BB->getParent();
532   DebugLoc DL = MI.getDebugLoc();
533   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
534   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
535   unsigned LoReg = MI.getOperand(0).getReg();
536   unsigned HiReg = MI.getOperand(1).getReg();
537   unsigned SrcReg = MI.getOperand(2).getReg();
538   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
539   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
540 
541   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
542                           RI);
543   MachineMemOperand *MMO =
544       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
545                               MachineMemOperand::MOLoad, 8, 8);
546   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
547       .addFrameIndex(FI)
548       .addImm(0)
549       .addMemOperand(MMO);
550   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
551       .addFrameIndex(FI)
552       .addImm(4)
553       .addMemOperand(MMO);
554   MI.eraseFromParent(); // The pseudo instruction is gone now.
555   return BB;
556 }
557 
558 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
559                                                  MachineBasicBlock *BB) {
560   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
561          "Unexpected instruction");
562 
563   MachineFunction &MF = *BB->getParent();
564   DebugLoc DL = MI.getDebugLoc();
565   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
566   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
567   unsigned DstReg = MI.getOperand(0).getReg();
568   unsigned LoReg = MI.getOperand(1).getReg();
569   unsigned HiReg = MI.getOperand(2).getReg();
570   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
571   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
572 
573   MachineMemOperand *MMO =
574       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
575                               MachineMemOperand::MOStore, 8, 8);
576   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
577       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
578       .addFrameIndex(FI)
579       .addImm(0)
580       .addMemOperand(MMO);
581   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
582       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
583       .addFrameIndex(FI)
584       .addImm(4)
585       .addMemOperand(MMO);
586   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
587   MI.eraseFromParent(); // The pseudo instruction is gone now.
588   return BB;
589 }
590 
591 MachineBasicBlock *
592 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
593                                                  MachineBasicBlock *BB) const {
594   switch (MI.getOpcode()) {
595   default:
596     llvm_unreachable("Unexpected instr type to insert");
597   case RISCV::Select_GPR_Using_CC_GPR:
598   case RISCV::Select_FPR32_Using_CC_GPR:
599   case RISCV::Select_FPR64_Using_CC_GPR:
600     break;
601   case RISCV::BuildPairF64Pseudo:
602     return emitBuildPairF64Pseudo(MI, BB);
603   case RISCV::SplitF64Pseudo:
604     return emitSplitF64Pseudo(MI, BB);
605   }
606 
607   // To "insert" a SELECT instruction, we actually have to insert the triangle
608   // control-flow pattern.  The incoming instruction knows the destination vreg
609   // to set, the condition code register to branch on, the true/false values to
610   // select between, and the condcode to use to select the appropriate branch.
611   //
612   // We produce the following control flow:
613   //     HeadMBB
614   //     |  \
615   //     |  IfFalseMBB
616   //     | /
617   //    TailMBB
618   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
619   const BasicBlock *LLVM_BB = BB->getBasicBlock();
620   DebugLoc DL = MI.getDebugLoc();
621   MachineFunction::iterator I = ++BB->getIterator();
622 
623   MachineBasicBlock *HeadMBB = BB;
624   MachineFunction *F = BB->getParent();
625   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
626   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
627 
628   F->insert(I, IfFalseMBB);
629   F->insert(I, TailMBB);
630   // Move all remaining instructions to TailMBB.
631   TailMBB->splice(TailMBB->begin(), HeadMBB,
632                   std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end());
633   // Update machine-CFG edges by transferring all successors of the current
634   // block to the new block which will contain the Phi node for the select.
635   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
636   // Set the successors for HeadMBB.
637   HeadMBB->addSuccessor(IfFalseMBB);
638   HeadMBB->addSuccessor(TailMBB);
639 
640   // Insert appropriate branch.
641   unsigned LHS = MI.getOperand(1).getReg();
642   unsigned RHS = MI.getOperand(2).getReg();
643   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
644   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
645 
646   BuildMI(HeadMBB, DL, TII.get(Opcode))
647     .addReg(LHS)
648     .addReg(RHS)
649     .addMBB(TailMBB);
650 
651   // IfFalseMBB just falls through to TailMBB.
652   IfFalseMBB->addSuccessor(TailMBB);
653 
654   // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
655   BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI),
656           MI.getOperand(0).getReg())
657       .addReg(MI.getOperand(4).getReg())
658       .addMBB(HeadMBB)
659       .addReg(MI.getOperand(5).getReg())
660       .addMBB(IfFalseMBB);
661 
662   MI.eraseFromParent(); // The pseudo instruction is gone now.
663   return TailMBB;
664 }
665 
666 // Calling Convention Implementation.
667 // The expectations for frontend ABI lowering vary from target to target.
668 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
669 // details, but this is a longer term goal. For now, we simply try to keep the
670 // role of the frontend as simple and well-defined as possible. The rules can
671 // be summarised as:
672 // * Never split up large scalar arguments. We handle them here.
673 // * If a hardfloat calling convention is being used, and the struct may be
674 // passed in a pair of registers (fp+fp, int+fp), and both registers are
675 // available, then pass as two separate arguments. If either the GPRs or FPRs
676 // are exhausted, then pass according to the rule below.
677 // * If a struct could never be passed in registers or directly in a stack
678 // slot (as it is larger than 2*XLEN and the floating point rules don't
679 // apply), then pass it using a pointer with the byval attribute.
680 // * If a struct is less than 2*XLEN, then coerce to either a two-element
681 // word-sized array or a 2*XLEN scalar (depending on alignment).
682 // * The frontend can determine whether a struct is returned by reference or
683 // not based on its size and fields. If it will be returned by reference, the
684 // frontend must modify the prototype so a pointer with the sret annotation is
685 // passed as the first argument. This is not necessary for large scalar
686 // returns.
687 // * Struct return values and varargs should be coerced to structs containing
688 // register-size fields in the same situations they would be for fixed
689 // arguments.
690 
691 static const MCPhysReg ArgGPRs[] = {
692   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
693   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
694 };
695 
696 // Pass a 2*XLEN argument that has been split into two XLEN values through
697 // registers or the stack as necessary.
698 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
699                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
700                                 MVT ValVT2, MVT LocVT2,
701                                 ISD::ArgFlagsTy ArgFlags2) {
702   unsigned XLenInBytes = XLen / 8;
703   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
704     // At least one half can be passed via register.
705     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
706                                      VA1.getLocVT(), CCValAssign::Full));
707   } else {
708     // Both halves must be passed on the stack, with proper alignment.
709     unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
710     State.addLoc(
711         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
712                             State.AllocateStack(XLenInBytes, StackAlign),
713                             VA1.getLocVT(), CCValAssign::Full));
714     State.addLoc(CCValAssign::getMem(
715         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
716         CCValAssign::Full));
717     return false;
718   }
719 
720   if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
721     // The second half can also be passed via register.
722     State.addLoc(
723         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
724   } else {
725     // The second half is passed via the stack, without additional alignment.
726     State.addLoc(CCValAssign::getMem(
727         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
728         CCValAssign::Full));
729   }
730 
731   return false;
732 }
733 
734 // Implements the RISC-V calling convention. Returns true upon failure.
735 static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
736                      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
737                      CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {
738   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
739   assert(XLen == 32 || XLen == 64);
740   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
741   if (ValVT == MVT::f32) {
742     LocVT = MVT::i32;
743     LocInfo = CCValAssign::BCvt;
744   }
745 
746   // Any return value split in to more than two values can't be returned
747   // directly.
748   if (IsRet && ValNo > 1)
749     return true;
750 
751   // If this is a variadic argument, the RISC-V calling convention requires
752   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
753   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
754   // be used regardless of whether the original argument was split during
755   // legalisation or not. The argument will not be passed by registers if the
756   // original type is larger than 2*XLEN, so the register alignment rule does
757   // not apply.
758   unsigned TwoXLenInBytes = (2 * XLen) / 8;
759   if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
760       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
761     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
762     // Skip 'odd' register if necessary.
763     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
764       State.AllocateReg(ArgGPRs);
765   }
766 
767   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
768   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
769       State.getPendingArgFlags();
770 
771   assert(PendingLocs.size() == PendingArgFlags.size() &&
772          "PendingLocs and PendingArgFlags out of sync");
773 
774   // Handle passing f64 on RV32D with a soft float ABI.
775   if (XLen == 32 && ValVT == MVT::f64) {
776     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
777            "Can't lower f64 if it is split");
778     // Depending on available argument GPRS, f64 may be passed in a pair of
779     // GPRs, split between a GPR and the stack, or passed completely on the
780     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
781     // cases.
782     unsigned Reg = State.AllocateReg(ArgGPRs);
783     LocVT = MVT::i32;
784     if (!Reg) {
785       unsigned StackOffset = State.AllocateStack(8, 8);
786       State.addLoc(
787           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
788       return false;
789     }
790     if (!State.AllocateReg(ArgGPRs))
791       State.AllocateStack(4, 4);
792     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
793     return false;
794   }
795 
796   // Split arguments might be passed indirectly, so keep track of the pending
797   // values.
798   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
799     LocVT = XLenVT;
800     LocInfo = CCValAssign::Indirect;
801     PendingLocs.push_back(
802         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
803     PendingArgFlags.push_back(ArgFlags);
804     if (!ArgFlags.isSplitEnd()) {
805       return false;
806     }
807   }
808 
809   // If the split argument only had two elements, it should be passed directly
810   // in registers or on the stack.
811   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
812     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
813     // Apply the normal calling convention rules to the first half of the
814     // split argument.
815     CCValAssign VA = PendingLocs[0];
816     ISD::ArgFlagsTy AF = PendingArgFlags[0];
817     PendingLocs.clear();
818     PendingArgFlags.clear();
819     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
820                                ArgFlags);
821   }
822 
823   // Allocate to a register if possible, or else a stack slot.
824   unsigned Reg = State.AllocateReg(ArgGPRs);
825   unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
826 
827   // If we reach this point and PendingLocs is non-empty, we must be at the
828   // end of a split argument that must be passed indirectly.
829   if (!PendingLocs.empty()) {
830     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
831     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
832 
833     for (auto &It : PendingLocs) {
834       if (Reg)
835         It.convertToReg(Reg);
836       else
837         It.convertToMem(StackOffset);
838       State.addLoc(It);
839     }
840     PendingLocs.clear();
841     PendingArgFlags.clear();
842     return false;
843   }
844 
845   assert(LocVT == XLenVT && "Expected an XLenVT at this stage");
846 
847   if (Reg) {
848     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
849     return false;
850   }
851 
852   if (ValVT == MVT::f32) {
853     LocVT = MVT::f32;
854     LocInfo = CCValAssign::Full;
855   }
856   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
857   return false;
858 }
859 
860 void RISCVTargetLowering::analyzeInputArgs(
861     MachineFunction &MF, CCState &CCInfo,
862     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
863   unsigned NumArgs = Ins.size();
864   FunctionType *FType = MF.getFunction().getFunctionType();
865 
866   for (unsigned i = 0; i != NumArgs; ++i) {
867     MVT ArgVT = Ins[i].VT;
868     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
869 
870     Type *ArgTy = nullptr;
871     if (IsRet)
872       ArgTy = FType->getReturnType();
873     else if (Ins[i].isOrigArg())
874       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
875 
876     if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
877                  ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
878       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
879                         << EVT(ArgVT).getEVTString() << '\n');
880       llvm_unreachable(nullptr);
881     }
882   }
883 }
884 
885 void RISCVTargetLowering::analyzeOutputArgs(
886     MachineFunction &MF, CCState &CCInfo,
887     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
888     CallLoweringInfo *CLI) const {
889   unsigned NumArgs = Outs.size();
890 
891   for (unsigned i = 0; i != NumArgs; i++) {
892     MVT ArgVT = Outs[i].VT;
893     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
894     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
895 
896     if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
897                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
898       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
899                         << EVT(ArgVT).getEVTString() << "\n");
900       llvm_unreachable(nullptr);
901     }
902   }
903 }
904 
905 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
906 // values.
907 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
908                                    const CCValAssign &VA, const SDLoc &DL) {
909   switch (VA.getLocInfo()) {
910   default:
911     llvm_unreachable("Unexpected CCValAssign::LocInfo");
912   case CCValAssign::Full:
913     break;
914   case CCValAssign::BCvt:
915     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
916     break;
917   }
918   return Val;
919 }
920 
921 // The caller is responsible for loading the full value if the argument is
922 // passed with CCValAssign::Indirect.
923 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
924                                 const CCValAssign &VA, const SDLoc &DL) {
925   MachineFunction &MF = DAG.getMachineFunction();
926   MachineRegisterInfo &RegInfo = MF.getRegInfo();
927   EVT LocVT = VA.getLocVT();
928   SDValue Val;
929 
930   unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
931   RegInfo.addLiveIn(VA.getLocReg(), VReg);
932   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
933 
934   if (VA.getLocInfo() == CCValAssign::Indirect)
935     return Val;
936 
937   return convertLocVTToValVT(DAG, Val, VA, DL);
938 }
939 
940 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
941                                    const CCValAssign &VA, const SDLoc &DL) {
942   EVT LocVT = VA.getLocVT();
943 
944   switch (VA.getLocInfo()) {
945   default:
946     llvm_unreachable("Unexpected CCValAssign::LocInfo");
947   case CCValAssign::Full:
948     break;
949   case CCValAssign::BCvt:
950     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
951     break;
952   }
953   return Val;
954 }
955 
956 // The caller is responsible for loading the full value if the argument is
957 // passed with CCValAssign::Indirect.
958 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
959                                 const CCValAssign &VA, const SDLoc &DL) {
960   MachineFunction &MF = DAG.getMachineFunction();
961   MachineFrameInfo &MFI = MF.getFrameInfo();
962   EVT LocVT = VA.getLocVT();
963   EVT ValVT = VA.getValVT();
964   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
965   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
966                                  VA.getLocMemOffset(), /*Immutable=*/true);
967   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
968   SDValue Val;
969 
970   ISD::LoadExtType ExtType;
971   switch (VA.getLocInfo()) {
972   default:
973     llvm_unreachable("Unexpected CCValAssign::LocInfo");
974   case CCValAssign::Full:
975   case CCValAssign::Indirect:
976     ExtType = ISD::NON_EXTLOAD;
977     break;
978   }
979   Val = DAG.getExtLoad(
980       ExtType, DL, LocVT, Chain, FIN,
981       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
982   return Val;
983 }
984 
985 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
986                                        const CCValAssign &VA, const SDLoc &DL) {
987   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
988          "Unexpected VA");
989   MachineFunction &MF = DAG.getMachineFunction();
990   MachineFrameInfo &MFI = MF.getFrameInfo();
991   MachineRegisterInfo &RegInfo = MF.getRegInfo();
992 
993   if (VA.isMemLoc()) {
994     // f64 is passed on the stack.
995     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
996     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
997     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
998                        MachinePointerInfo::getFixedStack(MF, FI));
999   }
1000 
1001   assert(VA.isRegLoc() && "Expected register VA assignment");
1002 
1003   unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1004   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
1005   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
1006   SDValue Hi;
1007   if (VA.getLocReg() == RISCV::X17) {
1008     // Second half of f64 is passed on the stack.
1009     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
1010     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1011     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
1012                      MachinePointerInfo::getFixedStack(MF, FI));
1013   } else {
1014     // Second half of f64 is passed in another GPR.
1015     unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1016     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
1017     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
1018   }
1019   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1020 }
1021 
1022 // Transform physical registers into virtual registers.
1023 SDValue RISCVTargetLowering::LowerFormalArguments(
1024     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1025     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1026     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1027 
1028   switch (CallConv) {
1029   default:
1030     report_fatal_error("Unsupported calling convention");
1031   case CallingConv::C:
1032   case CallingConv::Fast:
1033     break;
1034   }
1035 
1036   MachineFunction &MF = DAG.getMachineFunction();
1037 
1038   const Function &Func = MF.getFunction();
1039   if (Func.hasFnAttribute("interrupt")) {
1040     if (!Func.arg_empty())
1041       report_fatal_error(
1042         "Functions with the interrupt attribute cannot have arguments!");
1043 
1044     StringRef Kind =
1045       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1046 
1047     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
1048       report_fatal_error(
1049         "Function interrupt attribute argument not supported!");
1050   }
1051 
1052   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1053   MVT XLenVT = Subtarget.getXLenVT();
1054   unsigned XLenInBytes = Subtarget.getXLen() / 8;
1055   // Used with vargs to acumulate store chains.
1056   std::vector<SDValue> OutChains;
1057 
1058   // Assign locations to all of the incoming arguments.
1059   SmallVector<CCValAssign, 16> ArgLocs;
1060   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1061   analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
1062 
1063   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1064     CCValAssign &VA = ArgLocs[i];
1065     SDValue ArgValue;
1066     // Passing f64 on RV32D with a soft float ABI must be handled as a special
1067     // case.
1068     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
1069       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
1070     else if (VA.isRegLoc())
1071       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
1072     else
1073       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
1074 
1075     if (VA.getLocInfo() == CCValAssign::Indirect) {
1076       // If the original argument was split and passed by reference (e.g. i128
1077       // on RV32), we need to load all parts of it here (using the same
1078       // address).
1079       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1080                                    MachinePointerInfo()));
1081       unsigned ArgIndex = Ins[i].OrigArgIndex;
1082       assert(Ins[i].PartOffset == 0);
1083       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
1084         CCValAssign &PartVA = ArgLocs[i + 1];
1085         unsigned PartOffset = Ins[i + 1].PartOffset;
1086         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1087                                       DAG.getIntPtrConstant(PartOffset, DL));
1088         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1089                                      MachinePointerInfo()));
1090         ++i;
1091       }
1092       continue;
1093     }
1094     InVals.push_back(ArgValue);
1095   }
1096 
1097   if (IsVarArg) {
1098     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
1099     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
1100     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1101     MachineFrameInfo &MFI = MF.getFrameInfo();
1102     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1103     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1104 
1105     // Offset of the first variable argument from stack pointer, and size of
1106     // the vararg save area. For now, the varargs save area is either zero or
1107     // large enough to hold a0-a7.
1108     int VaArgOffset, VarArgsSaveSize;
1109 
1110     // If all registers are allocated, then all varargs must be passed on the
1111     // stack and we don't need to save any argregs.
1112     if (ArgRegs.size() == Idx) {
1113       VaArgOffset = CCInfo.getNextStackOffset();
1114       VarArgsSaveSize = 0;
1115     } else {
1116       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
1117       VaArgOffset = -VarArgsSaveSize;
1118     }
1119 
1120     // Record the frame index of the first variable argument
1121     // which is a value necessary to VASTART.
1122     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1123     RVFI->setVarArgsFrameIndex(FI);
1124 
1125     // If saving an odd number of registers then create an extra stack slot to
1126     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
1127     // offsets to even-numbered registered remain 2*XLEN-aligned.
1128     if (Idx % 2) {
1129       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes,
1130                                  true);
1131       VarArgsSaveSize += XLenInBytes;
1132     }
1133 
1134     // Copy the integer registers that may have been used for passing varargs
1135     // to the vararg save area.
1136     for (unsigned I = Idx; I < ArgRegs.size();
1137          ++I, VaArgOffset += XLenInBytes) {
1138       const unsigned Reg = RegInfo.createVirtualRegister(RC);
1139       RegInfo.addLiveIn(ArgRegs[I], Reg);
1140       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
1141       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
1142       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1143       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
1144                                    MachinePointerInfo::getFixedStack(MF, FI));
1145       cast<StoreSDNode>(Store.getNode())
1146           ->getMemOperand()
1147           ->setValue((Value *)nullptr);
1148       OutChains.push_back(Store);
1149     }
1150     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
1151   }
1152 
1153   // All stores are grouped in one node to allow the matching between
1154   // the size of Ins and InVals. This only happens for vararg functions.
1155   if (!OutChains.empty()) {
1156     OutChains.push_back(Chain);
1157     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1158   }
1159 
1160   return Chain;
1161 }
1162 
1163 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
1164 /// for tail call optimization.
1165 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
1166 bool RISCVTargetLowering::IsEligibleForTailCallOptimization(
1167   CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
1168   const SmallVector<CCValAssign, 16> &ArgLocs) const {
1169 
1170   auto &Callee = CLI.Callee;
1171   auto CalleeCC = CLI.CallConv;
1172   auto IsVarArg = CLI.IsVarArg;
1173   auto &Outs = CLI.Outs;
1174   auto &Caller = MF.getFunction();
1175   auto CallerCC = Caller.getCallingConv();
1176 
1177   // Do not tail call opt functions with "disable-tail-calls" attribute.
1178   if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
1179     return false;
1180 
1181   // Exception-handling functions need a special set of instructions to
1182   // indicate a return to the hardware. Tail-calling another function would
1183   // probably break this.
1184   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
1185   // should be expanded as new function attributes are introduced.
1186   if (Caller.hasFnAttribute("interrupt"))
1187     return false;
1188 
1189   // Do not tail call opt functions with varargs.
1190   if (IsVarArg)
1191     return false;
1192 
1193   // Do not tail call opt if the stack is used to pass parameters.
1194   if (CCInfo.getNextStackOffset() != 0)
1195     return false;
1196 
1197   // Do not tail call opt if any parameters need to be passed indirectly.
1198   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
1199   // passed indirectly. So the address of the value will be passed in a
1200   // register, or if not available, then the address is put on the stack. In
1201   // order to pass indirectly, space on the stack often needs to be allocated
1202   // in order to store the value. In this case the CCInfo.getNextStackOffset()
1203   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
1204   // are passed CCValAssign::Indirect.
1205   for (auto &VA : ArgLocs)
1206     if (VA.getLocInfo() == CCValAssign::Indirect)
1207       return false;
1208 
1209   // Do not tail call opt if either caller or callee uses struct return
1210   // semantics.
1211   auto IsCallerStructRet = Caller.hasStructRetAttr();
1212   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
1213   if (IsCallerStructRet || IsCalleeStructRet)
1214     return false;
1215 
1216   // Externally-defined functions with weak linkage should not be
1217   // tail-called. The behaviour of branch instructions in this situation (as
1218   // used for tail calls) is implementation-defined, so we cannot rely on the
1219   // linker replacing the tail call with a return.
1220   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1221     const GlobalValue *GV = G->getGlobal();
1222     if (GV->hasExternalWeakLinkage())
1223       return false;
1224   }
1225 
1226   // The callee has to preserve all registers the caller needs to preserve.
1227   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
1228   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1229   if (CalleeCC != CallerCC) {
1230     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1231     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1232       return false;
1233   }
1234 
1235   // Byval parameters hand the function a pointer directly into the stack area
1236   // we want to reuse during a tail call. Working around this *is* possible
1237   // but less efficient and uglier in LowerCall.
1238   for (auto &Arg : Outs)
1239     if (Arg.Flags.isByVal())
1240       return false;
1241 
1242   return true;
1243 }
1244 
1245 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
1246 // and output parameter nodes.
1247 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1248                                        SmallVectorImpl<SDValue> &InVals) const {
1249   SelectionDAG &DAG = CLI.DAG;
1250   SDLoc &DL = CLI.DL;
1251   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1252   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1253   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1254   SDValue Chain = CLI.Chain;
1255   SDValue Callee = CLI.Callee;
1256   bool &IsTailCall = CLI.IsTailCall;
1257   CallingConv::ID CallConv = CLI.CallConv;
1258   bool IsVarArg = CLI.IsVarArg;
1259   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1260   MVT XLenVT = Subtarget.getXLenVT();
1261 
1262   MachineFunction &MF = DAG.getMachineFunction();
1263 
1264   // Analyze the operands of the call, assigning locations to each operand.
1265   SmallVector<CCValAssign, 16> ArgLocs;
1266   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1267   analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
1268 
1269   // Check if it's really possible to do a tail call.
1270   if (IsTailCall)
1271     IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF,
1272                                                    ArgLocs);
1273 
1274   if (IsTailCall)
1275     ++NumTailCalls;
1276   else if (CLI.CS && CLI.CS.isMustTailCall())
1277     report_fatal_error("failed to perform tail call elimination on a call "
1278                        "site marked musttail");
1279 
1280   // Get a count of how many bytes are to be pushed on the stack.
1281   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1282 
1283   // Create local copies for byval args
1284   SmallVector<SDValue, 8> ByValArgs;
1285   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1286     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1287     if (!Flags.isByVal())
1288       continue;
1289 
1290     SDValue Arg = OutVals[i];
1291     unsigned Size = Flags.getByValSize();
1292     unsigned Align = Flags.getByValAlign();
1293 
1294     int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false);
1295     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1296     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
1297 
1298     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
1299                           /*IsVolatile=*/false,
1300                           /*AlwaysInline=*/false,
1301                           IsTailCall, MachinePointerInfo(),
1302                           MachinePointerInfo());
1303     ByValArgs.push_back(FIPtr);
1304   }
1305 
1306   if (!IsTailCall)
1307     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
1308 
1309   // Copy argument values to their designated locations.
1310   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
1311   SmallVector<SDValue, 8> MemOpChains;
1312   SDValue StackPtr;
1313   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
1314     CCValAssign &VA = ArgLocs[i];
1315     SDValue ArgValue = OutVals[i];
1316     ISD::ArgFlagsTy Flags = Outs[i].Flags;
1317 
1318     // Handle passing f64 on RV32D with a soft float ABI as a special case.
1319     bool IsF64OnRV32DSoftABI =
1320         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
1321     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
1322       SDValue SplitF64 = DAG.getNode(
1323           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
1324       SDValue Lo = SplitF64.getValue(0);
1325       SDValue Hi = SplitF64.getValue(1);
1326 
1327       unsigned RegLo = VA.getLocReg();
1328       RegsToPass.push_back(std::make_pair(RegLo, Lo));
1329 
1330       if (RegLo == RISCV::X17) {
1331         // Second half of f64 is passed on the stack.
1332         // Work out the address of the stack slot.
1333         if (!StackPtr.getNode())
1334           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1335         // Emit the store.
1336         MemOpChains.push_back(
1337             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
1338       } else {
1339         // Second half of f64 is passed in another GPR.
1340         unsigned RegHigh = RegLo + 1;
1341         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
1342       }
1343       continue;
1344     }
1345 
1346     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
1347     // as any other MemLoc.
1348 
1349     // Promote the value if needed.
1350     // For now, only handle fully promoted and indirect arguments.
1351     if (VA.getLocInfo() == CCValAssign::Indirect) {
1352       // Store the argument in a stack slot and pass its address.
1353       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
1354       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1355       MemOpChains.push_back(
1356           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1357                        MachinePointerInfo::getFixedStack(MF, FI)));
1358       // If the original argument was split (e.g. i128), we need
1359       // to store all parts of it here (and pass just one address).
1360       unsigned ArgIndex = Outs[i].OrigArgIndex;
1361       assert(Outs[i].PartOffset == 0);
1362       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
1363         SDValue PartValue = OutVals[i + 1];
1364         unsigned PartOffset = Outs[i + 1].PartOffset;
1365         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1366                                       DAG.getIntPtrConstant(PartOffset, DL));
1367         MemOpChains.push_back(
1368             DAG.getStore(Chain, DL, PartValue, Address,
1369                          MachinePointerInfo::getFixedStack(MF, FI)));
1370         ++i;
1371       }
1372       ArgValue = SpillSlot;
1373     } else {
1374       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
1375     }
1376 
1377     // Use local copy if it is a byval arg.
1378     if (Flags.isByVal())
1379       ArgValue = ByValArgs[j++];
1380 
1381     if (VA.isRegLoc()) {
1382       // Queue up the argument copies and emit them at the end.
1383       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1384     } else {
1385       assert(VA.isMemLoc() && "Argument not register or memory");
1386       assert(!IsTailCall && "Tail call not allowed if stack is used "
1387                             "for passing parameters");
1388 
1389       // Work out the address of the stack slot.
1390       if (!StackPtr.getNode())
1391         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
1392       SDValue Address =
1393           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1394                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
1395 
1396       // Emit the store.
1397       MemOpChains.push_back(
1398           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1399     }
1400   }
1401 
1402   // Join the stores, which are independent of one another.
1403   if (!MemOpChains.empty())
1404     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1405 
1406   SDValue Glue;
1407 
1408   // Build a sequence of copy-to-reg nodes, chained and glued together.
1409   for (auto &Reg : RegsToPass) {
1410     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
1411     Glue = Chain.getValue(1);
1412   }
1413 
1414   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
1415   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
1416   // split it and then direct call can be matched by PseudoCALL.
1417   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
1418     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0);
1419   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1420     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0);
1421   }
1422 
1423   // The first call operand is the chain and the second is the target address.
1424   SmallVector<SDValue, 8> Ops;
1425   Ops.push_back(Chain);
1426   Ops.push_back(Callee);
1427 
1428   // Add argument registers to the end of the list so that they are
1429   // known live into the call.
1430   for (auto &Reg : RegsToPass)
1431     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1432 
1433   if (!IsTailCall) {
1434     // Add a register mask operand representing the call-preserved registers.
1435     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1436     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1437     assert(Mask && "Missing call preserved mask for calling convention");
1438     Ops.push_back(DAG.getRegisterMask(Mask));
1439   }
1440 
1441   // Glue the call to the argument copies, if any.
1442   if (Glue.getNode())
1443     Ops.push_back(Glue);
1444 
1445   // Emit the call.
1446   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1447 
1448   if (IsTailCall) {
1449     MF.getFrameInfo().setHasTailCall();
1450     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
1451   }
1452 
1453   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
1454   Glue = Chain.getValue(1);
1455 
1456   // Mark the end of the call, which is glued to the call itself.
1457   Chain = DAG.getCALLSEQ_END(Chain,
1458                              DAG.getConstant(NumBytes, DL, PtrVT, true),
1459                              DAG.getConstant(0, DL, PtrVT, true),
1460                              Glue, DL);
1461   Glue = Chain.getValue(1);
1462 
1463   // Assign locations to each value returned by this call.
1464   SmallVector<CCValAssign, 16> RVLocs;
1465   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
1466   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
1467 
1468   // Copy all of the result registers out of their specified physreg.
1469   for (auto &VA : RVLocs) {
1470     // Copy the value out
1471     SDValue RetValue =
1472         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
1473     // Glue the RetValue to the end of the call sequence
1474     Chain = RetValue.getValue(1);
1475     Glue = RetValue.getValue(2);
1476 
1477     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1478       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
1479       SDValue RetValue2 =
1480           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
1481       Chain = RetValue2.getValue(1);
1482       Glue = RetValue2.getValue(2);
1483       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
1484                              RetValue2);
1485     }
1486 
1487     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
1488 
1489     InVals.push_back(RetValue);
1490   }
1491 
1492   return Chain;
1493 }
1494 
1495 bool RISCVTargetLowering::CanLowerReturn(
1496     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
1497     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
1498   SmallVector<CCValAssign, 16> RVLocs;
1499   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
1500   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
1501     MVT VT = Outs[i].VT;
1502     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1503     if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags,
1504                  CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
1505       return false;
1506   }
1507   return true;
1508 }
1509 
1510 SDValue
1511 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1512                                  bool IsVarArg,
1513                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
1514                                  const SmallVectorImpl<SDValue> &OutVals,
1515                                  const SDLoc &DL, SelectionDAG &DAG) const {
1516   // Stores the assignment of the return value to a location.
1517   SmallVector<CCValAssign, 16> RVLocs;
1518 
1519   // Info about the registers and stack slot.
1520   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
1521                  *DAG.getContext());
1522 
1523   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
1524                     nullptr);
1525 
1526   SDValue Glue;
1527   SmallVector<SDValue, 4> RetOps(1, Chain);
1528 
1529   // Copy the result values into the output registers.
1530   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
1531     SDValue Val = OutVals[i];
1532     CCValAssign &VA = RVLocs[i];
1533     assert(VA.isRegLoc() && "Can only return in registers!");
1534 
1535     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
1536       // Handle returning f64 on RV32D with a soft float ABI.
1537       assert(VA.isRegLoc() && "Expected return via registers");
1538       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
1539                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
1540       SDValue Lo = SplitF64.getValue(0);
1541       SDValue Hi = SplitF64.getValue(1);
1542       unsigned RegLo = VA.getLocReg();
1543       unsigned RegHi = RegLo + 1;
1544       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
1545       Glue = Chain.getValue(1);
1546       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
1547       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
1548       Glue = Chain.getValue(1);
1549       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
1550     } else {
1551       // Handle a 'normal' return.
1552       Val = convertValVTToLocVT(DAG, Val, VA, DL);
1553       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
1554 
1555       // Guarantee that all emitted copies are stuck together.
1556       Glue = Chain.getValue(1);
1557       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
1558     }
1559   }
1560 
1561   RetOps[0] = Chain; // Update chain.
1562 
1563   // Add the glue node if we have it.
1564   if (Glue.getNode()) {
1565     RetOps.push_back(Glue);
1566   }
1567 
1568   // Interrupt service routines use different return instructions.
1569   const Function &Func = DAG.getMachineFunction().getFunction();
1570   if (Func.hasFnAttribute("interrupt")) {
1571     if (!Func.getReturnType()->isVoidTy())
1572       report_fatal_error(
1573           "Functions with the interrupt attribute must have void return type!");
1574 
1575     MachineFunction &MF = DAG.getMachineFunction();
1576     StringRef Kind =
1577       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1578 
1579     unsigned RetOpc;
1580     if (Kind == "user")
1581       RetOpc = RISCVISD::URET_FLAG;
1582     else if (Kind == "supervisor")
1583       RetOpc = RISCVISD::SRET_FLAG;
1584     else
1585       RetOpc = RISCVISD::MRET_FLAG;
1586 
1587     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
1588   }
1589 
1590   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
1591 }
1592 
1593 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
1594   switch ((RISCVISD::NodeType)Opcode) {
1595   case RISCVISD::FIRST_NUMBER:
1596     break;
1597   case RISCVISD::RET_FLAG:
1598     return "RISCVISD::RET_FLAG";
1599   case RISCVISD::URET_FLAG:
1600     return "RISCVISD::URET_FLAG";
1601   case RISCVISD::SRET_FLAG:
1602     return "RISCVISD::SRET_FLAG";
1603   case RISCVISD::MRET_FLAG:
1604     return "RISCVISD::MRET_FLAG";
1605   case RISCVISD::CALL:
1606     return "RISCVISD::CALL";
1607   case RISCVISD::SELECT_CC:
1608     return "RISCVISD::SELECT_CC";
1609   case RISCVISD::BuildPairF64:
1610     return "RISCVISD::BuildPairF64";
1611   case RISCVISD::SplitF64:
1612     return "RISCVISD::SplitF64";
1613   case RISCVISD::TAIL:
1614     return "RISCVISD::TAIL";
1615   }
1616   return nullptr;
1617 }
1618 
1619 std::pair<unsigned, const TargetRegisterClass *>
1620 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1621                                                   StringRef Constraint,
1622                                                   MVT VT) const {
1623   // First, see if this is a constraint that directly corresponds to a
1624   // RISCV register class.
1625   if (Constraint.size() == 1) {
1626     switch (Constraint[0]) {
1627     case 'r':
1628       return std::make_pair(0U, &RISCV::GPRRegClass);
1629     default:
1630       break;
1631     }
1632   }
1633 
1634   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1635 }
1636 
1637 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
1638                                                    Instruction *Inst,
1639                                                    AtomicOrdering Ord) const {
1640   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
1641     return Builder.CreateFence(Ord);
1642   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
1643     return Builder.CreateFence(AtomicOrdering::Release);
1644   return nullptr;
1645 }
1646 
1647 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
1648                                                     Instruction *Inst,
1649                                                     AtomicOrdering Ord) const {
1650   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
1651     return Builder.CreateFence(AtomicOrdering::Acquire);
1652   return nullptr;
1653 }
1654 
1655 TargetLowering::AtomicExpansionKind
1656 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1657   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
1658   if (Size == 8 || Size == 16)
1659     return AtomicExpansionKind::MaskedIntrinsic;
1660   return AtomicExpansionKind::None;
1661 }
1662 
1663 static Intrinsic::ID
1664 getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
1665   switch (BinOp) {
1666   default:
1667     llvm_unreachable("Unexpected AtomicRMW BinOp");
1668   case AtomicRMWInst::Xchg:
1669     return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
1670   case AtomicRMWInst::Add:
1671     return Intrinsic::riscv_masked_atomicrmw_add_i32;
1672   case AtomicRMWInst::Sub:
1673     return Intrinsic::riscv_masked_atomicrmw_sub_i32;
1674   case AtomicRMWInst::Nand:
1675     return Intrinsic::riscv_masked_atomicrmw_nand_i32;
1676   case AtomicRMWInst::Max:
1677     return Intrinsic::riscv_masked_atomicrmw_max_i32;
1678   case AtomicRMWInst::Min:
1679     return Intrinsic::riscv_masked_atomicrmw_min_i32;
1680   case AtomicRMWInst::UMax:
1681     return Intrinsic::riscv_masked_atomicrmw_umax_i32;
1682   case AtomicRMWInst::UMin:
1683     return Intrinsic::riscv_masked_atomicrmw_umin_i32;
1684   }
1685 }
1686 
1687 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
1688     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
1689     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
1690   Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
1691   Type *Tys[] = {AlignedAddr->getType()};
1692   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
1693       AI->getModule(),
1694       getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);
1695 
1696   // Must pass the shift amount needed to sign extend the loaded value prior
1697   // to performing a signed comparison for min/max. ShiftAmt is the number of
1698   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
1699   // is the number of bits to left+right shift the value in order to
1700   // sign-extend.
1701   if (AI->getOperation() == AtomicRMWInst::Min ||
1702       AI->getOperation() == AtomicRMWInst::Max) {
1703     const DataLayout &DL = AI->getModule()->getDataLayout();
1704     unsigned ValWidth =
1705         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
1706     Value *SextShamt = Builder.CreateSub(
1707         Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
1708     return Builder.CreateCall(LrwOpScwLoop,
1709                               {AlignedAddr, Incr, Mask, SextShamt, Ordering});
1710   }
1711 
1712   return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
1713 }
1714 
1715 TargetLowering::AtomicExpansionKind
1716 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
1717     AtomicCmpXchgInst *CI) const {
1718   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
1719   if (Size == 8 || Size == 16)
1720     return AtomicExpansionKind::MaskedIntrinsic;
1721   return AtomicExpansionKind::None;
1722 }
1723 
1724 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
1725     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
1726     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
1727   Value *Ordering = Builder.getInt32(static_cast<uint32_t>(Ord));
1728   Type *Tys[] = {AlignedAddr->getType()};
1729   Function *MaskedCmpXchg = Intrinsic::getDeclaration(
1730       CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys);
1731   return Builder.CreateCall(MaskedCmpXchg,
1732                             {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
1733 }
1734